library(glmnet)
Loaded glmnet 4.1-2
#Load in People, Batting, and Pitching Dataframes
data("People") 
data("Batting")
data("Pitching")
#Merges player name to Batting data. 
bstats <- battingStats()
    str(bstats)
'data.frame':   108789 obs. of  29 variables:
 $ playerID: chr  "abercda01" "addybo01" "allisar01" "allisdo01" ...
 $ yearID  : int  1871 1871 1871 1871 1871 1871 1871 1871 1871 1871 ...
 $ stint   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ teamID  : Factor w/ 149 levels "ALT","ANA","ARI",..: 136 111 39 142 111 56 111 24 56 24 ...
 $ lgID    : Factor w/ 7 levels "AA","AL","FL",..: 4 4 4 4 4 4 4 4 4 4 ...
 $ G       : int  1 25 29 27 25 12 1 31 1 18 ...
 $ AB      : int  4 118 137 133 120 49 4 157 5 86 ...
 $ R       : int  0 30 28 28 29 9 0 66 1 13 ...
 $ H       : int  0 32 40 44 39 11 1 63 1 13 ...
 $ X2B     : int  0 6 4 10 11 2 0 10 1 2 ...
 $ X3B     : int  0 0 5 2 3 1 0 9 0 1 ...
 $ HR      : int  0 0 0 2 0 0 0 0 0 0 ...
 $ RBI     : int  0 13 19 27 16 5 2 34 1 11 ...
 $ SB      : int  0 8 3 1 6 0 0 11 0 1 ...
 $ CS      : int  0 1 1 1 2 1 0 6 0 0 ...
 $ BB      : int  0 4 2 0 2 0 1 13 0 0 ...
 $ SO      : int  0 0 5 2 1 1 0 1 0 0 ...
 $ IBB     : int  NA NA NA NA NA NA NA NA NA NA ...
 $ HBP     : int  NA NA NA NA NA NA NA NA NA NA ...
 $ SH      : int  NA NA NA NA NA NA NA NA NA NA ...
 $ SF      : int  NA NA NA NA NA NA NA NA NA NA ...
 $ GIDP    : int  0 0 1 0 0 0 0 1 0 0 ...
 $ BA      : num  0 0.271 0.292 0.331 0.325 0.224 0.25 0.401 0.2 0.151 ...
 $ PA      : num  4 122 139 133 122 49 5 170 5 86 ...
 $ TB      : num  0 38 54 64 56 15 1 91 2 17 ...
 $ SlugPct : num  0 0.322 0.394 0.481 0.467 0.306 0.25 0.58 0.4 0.198 ...
 $ OBP     : num  0 0.295 0.302 0.331 0.336 0.224 0.4 0.447 0.2 0.151 ...
 $ OPS     : num  0 0.617 0.696 0.812 0.803 ...
 $ BABIP   : num  0 0.271 0.303 0.326 0.328 0.229 0.25 0.404 0.2 0.151 ...
    

People$name <- paste(People$nameFirst, People$nameLast, sep = " ")

batting_name <- merge(Batting,
                 People[,c("playerID", "name")],
                 by = "playerID", all.x = TRUE)

#Merges player name to Pitching data.

People$name <- paste(People$nameFirst, People$nameLast, sep = " ")

pitching_name <- merge(Pitching,
                 People[,c("playerID", "name")],
                 by = "playerID", all.x = TRUE)
#Creating additional stats for bstats
bstats[is.na(bstats)] = 0
#is.nan(bstats)

bstats <- bstats %>%
  mutate(K_Percent = SO / PA) %>%
  mutate(BB_Percent = (BB + IBB) / PA) %>%
  mutate_all(~replace(., is.nan(.), 0))
invalid factor level, NA generatedinvalid factor level, NA generated
bstats <- bstats %>%
  mutate_at(vars(K_Percent, BB_Percent), funs(round(., 3)))
`funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas: 

  # Simple named list: 
  list(mean = mean, median = median)

  # Auto named with `tibble::lst()`: 
  tibble::lst(mean, median)

  # Using lambdas
  list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
bstats_salary <- bstats %>%
              filter(yearID >= 1985) %>%
              left_join(select(Salaries, playerID, yearID, teamID, salary), 
                         by=c("playerID", "yearID", "teamID"))

bstats_salary[is.na(bstats_salary)] = 0
str(bstats_salary)
'data.frame':   46535 obs. of  32 variables:
 $ playerID  : chr  "aasedo01" "abregjo01" "ackerji01" "adamsri02" ...
 $ yearID    : num  1985 1985 1985 1985 1985 ...
 $ stint     : num  1 1 1 1 1 1 1 1 1 1 ...
 $ teamID    : Factor w/ 149 levels "ALT","ANA","ARI",..: 5 35 134 117 33 102 94 134 134 134 ...
 $ lgID      : Factor w/ 7 levels "AA","AL","FL",..: 2 5 2 5 2 5 5 2 2 2 ...
 $ G         : num  54 6 61 54 54 91 22 12 36 14 ...
 $ AB        : num  0 9 0 121 0 165 36 20 0 34 ...
 $ R         : num  0 0 0 12 0 27 1 2 0 2 ...
 $ H         : num  0 0 0 23 0 46 10 4 0 4 ...
 $ X2B       : num  0 0 0 3 0 7 2 1 0 1 ...
 $ X3B       : num  0 0 0 1 0 3 0 0 0 0 ...
 $ HR        : num  0 0 0 2 0 6 0 1 0 0 ...
 $ RBI       : num  0 1 0 10 0 21 2 5 0 3 ...
 $ SB        : num  0 0 0 1 0 1 0 0 0 0 ...
 $ CS        : num  0 0 0 1 0 0 0 0 0 0 ...
 $ BB        : num  0 0 0 5 0 22 1 3 0 0 ...
 $ SO        : num  0 2 0 23 0 26 5 6 0 10 ...
 $ IBB       : num  0 0 0 3 0 5 0 0 0 0 ...
 $ HBP       : num  0 0 0 1 0 6 0 0 0 0 ...
 $ SH        : num  0 0 0 3 0 4 7 0 0 0 ...
 $ SF        : num  0 0 0 0 0 3 0 1 0 0 ...
 $ GIDP      : num  0 0 0 2 0 7 1 1 0 1 ...
 $ BA        : num  0 0 0 0.19 0 0.279 0.278 0.2 0 0.118 ...
 $ PA        : num  0 9 0 130 0 200 44 24 0 34 ...
 $ TB        : num  0 0 0 34 0 77 12 8 0 5 ...
 $ SlugPct   : num  0 0 0 0.281 0 0.467 0.333 0.4 0 0.147 ...
 $ OBP       : num  0 0 0 0.228 0 0.378 0.297 0.292 0 0.118 ...
 $ OPS       : num  0 0 0 0.509 0 0.845 0.63 0.692 0 0.265 ...
 $ BABIP     : num  0 0 0 0.219 0 0.294 0.323 0.214 0 0.167 ...
 $ K_Percent : num  0 0.222 0 0.177 0 0.13 0.114 0.25 0 0.294 ...
 $ BB_Percent: num  0 0 0 0.062 0 0.135 0.023 0.125 0 0 ...
 $ salary    : num  0 0 170000 0 147500 ...
bstats_sure <- bstats_salary %>%
  filter(PA > 150) %>%
  select(OPS, BABIP, K_Percent, BB_Percent, salary)

Data Preparation (Lesson 1 & 2)

#Keep players with over 150 at bats. (We can change this value if necessary).
#Creating batting average variable.

batting1 <- bstats %>%
  filter(AB >= 150)
  
bstats %>%
  filter(playerID == "bogaexa01")

Exploratory Analysis (Lesson 1 & 2)

Lessons 1 and 2 will just be parts of the overall project. Simple things like data manipulation, apply functions, boxplots, etc. This will be data preparation items and exploratory analysis.

b <- ggplot(batting1, aes(x = teamID, y = HR)) +
  geom_boxplot(col = "black", aes(fill = teamID))
b

hitters1 <- batting1 %>%
  filter(yearID < 1895) %>%
  select(SlugPct)

hitters2 <- batting1 %>%
  filter(yearID > 1894, yearID < 1921) %>%
  select(SlugPct)

hitters3 <- batting1 %>%
  filter(yearID > 1920, yearID < 1969) %>%
  select(SlugPct)

hitters4 <- batting1 %>%
  filter(yearID > 1969) %>%
  select(SlugPct)
#Organizing 4 different datasets looking at slugging percentage for the following boxplots. All of these are somewhat different eras, with the most dramatic split being from before 1920 (pre-Babe Ruth) and after 1920 (during and post-Babe Ruth)
boxplot(hitters1,
        main = "Slugging percentage from late 1871 - 1894",
        ylab = "Slugging percentage",
        col = "blue",
        horizontal = TRUE)

boxplot(hitters2, 
        main = "Slugging percentage from 1895-1920",
        ylab = "Slugging percentage",
        col = "yellow",
        horizontal = TRUE)

boxplot(hitters3, 
        main = "Slugging percentage from 1921-1968",
        ylab = "Slugging percentage",
        col = "red",
        horizontal = TRUE)

boxplot(hitters4, 
        main = "Slugging percentage from 1969 - present",
        ylab = "Slugging percentage",
        col = "red",
        horizontal = TRUE)

sapply(hitters1, mean, na.rm = T)
  SlugPct 
0.3456088 
sapply(hitters2, mean, na.rm = T)
 SlugPct 
0.348923 
sapply(hitters3, mean, na.rm = T)
  SlugPct 
0.3972127 
sapply(hitters4, mean, na.rm = T)
  SlugPct 
0.4088045 
#Notice that gigantic increase between hitters2 and hitters3
summary(hitters1)
    SlugPct      
 Min.   :0.1220  
 1st Qu.:0.2900  
 Median :0.3380  
 Mean   :0.3456  
 3rd Qu.:0.3970  
 Max.   :0.6960  
summary(hitters2)
    SlugPct      
 Min.   :0.1480  
 1st Qu.:0.3003  
 Median :0.3430  
 Mean   :0.3489  
 3rd Qu.:0.3910  
 Max.   :0.8490  
summary(hitters3)
    SlugPct      
 Min.   :0.1760  
 1st Qu.:0.3420  
 Median :0.3900  
 Mean   :0.3972  
 3rd Qu.:0.4440  
 Max.   :0.8460  
summary(hitters4)
    SlugPct      
 Min.   :0.1730  
 1st Qu.:0.3540  
 Median :0.4040  
 Mean   :0.4088  
 3rd Qu.:0.4580  
 Max.   :0.8630  
#Keep batting stats that we want for pairs.
batting_num <- bstats %>%
  filter(PA >= 150) %>%
  select("BA", 'OBP', 'SlugPct', "SO", "BB", "HR")
  
pairs(batting_num)

Career Batting Stats

careerBatting <- na.omit(bstats)
careerBatting <- careerBatting %>%
  select(playerID, BA, PA, SlugPct, OBP, SO, HR) %>%
  group_by(playerID) %>%
  summarise_all('mean')
careerBatting_num <- careerBatting %>%
  filter(PA >= 150) %>%
  select(BA, PA, SlugPct, OBP, SO, HR)

pairs(careerBatting_num)

corrmatrix <- cor(batting_num)
corrplot(corrmatrix, method = 'number') #Gives us correlation from pairs graph.

careerBatting_num1 <- careerBatting_num %>%
  filter(PA > 500)

0-dimensional Reduction (Lesson 4)

Bootstrapping

PCA (Lesson 4)

res <- batting_num %>% prcomp(scale = TRUE)
res
Standard deviations (1, .., p=6):
[1] 1.8624983 1.1955799 0.8163046 0.5272521 0.3234188 0.2296540

Rotation (n x k) = (6 x 6):
               PC1         PC2         PC3         PC4        PC5          PC6
BA      -0.3736490  0.53149382  0.20948811 -0.39409469  0.6134310  0.049063667
OBP     -0.4412694  0.38795844 -0.30295510 -0.06651166 -0.5817204  0.469217735
SlugPct -0.4816546  0.08527252  0.45916589  0.20230952 -0.3441137 -0.624948649
SO      -0.2974863 -0.61917967  0.04176753 -0.71554909 -0.1194610  0.009617743
BB      -0.4043725 -0.14520286 -0.75150469  0.19652707  0.2909420 -0.356888661
HR      -0.4262175 -0.39403532  0.29495049  0.49870136  0.2607132  0.509317820
loadings <- res$rotation
loadings
               PC1         PC2         PC3         PC4        PC5          PC6
BA      -0.3736490  0.53149382  0.20948811 -0.39409469  0.6134310  0.049063667
OBP     -0.4412694  0.38795844 -0.30295510 -0.06651166 -0.5817204  0.469217735
SlugPct -0.4816546  0.08527252  0.45916589  0.20230952 -0.3441137 -0.624948649
SO      -0.2974863 -0.61917967  0.04176753 -0.71554909 -0.1194610  0.009617743
BB      -0.4043725 -0.14520286 -0.75150469  0.19652707  0.2909420 -0.356888661
HR      -0.4262175 -0.39403532  0.29495049  0.49870136  0.2607132  0.509317820
score_mat <- res$x
score_mat
                   PC1           PC2           PC3           PC4           PC5           PC6
    [1,] -2.416723e+00  4.560698e+00  1.443027e+00 -7.333664e-01 -4.234072e-01 -1.762980e-01
    [2,]  1.219725e+00  1.849015e+00  8.201252e-01 -1.952915e-01  3.981469e-01  1.822513e-01
    [3,]  1.474218e+00  7.482123e-01  9.141729e-01  8.396478e-01 -2.439173e-01 -5.416897e-01
    [4,]  6.203888e-01  2.304795e+00  1.057469e+00 -2.640564e-01  2.662469e-01  3.072002e-02
    [5,]  2.943669e+00  3.660013e-01  5.506029e-01  4.155864e-01  5.317398e-01 -3.286813e-02
    [6,]  1.777087e+00  1.308355e+00  8.996221e-01  1.318660e-01  3.669270e-01 -1.200040e-01
    [7,]  1.638294e+00  1.365860e+00  9.687942e-01  2.460817e-01  2.518894e-01 -2.846689e-01
    [8,]  1.277859e+00  1.429171e+00  1.237701e+00  3.160011e-01  2.384888e-01 -3.187137e-01
    [9,]  2.575716e+00  7.738965e-01  5.169331e-01  3.002796e-01  5.345135e-01  1.057655e-01
   [10,]  1.406601e+00  1.626801e+00  1.019207e+00  6.878197e-02  3.105486e-01 -1.622986e-01
   [11,]  2.314598e+00  1.032260e+00  3.284764e-01  1.875309e-01  4.737664e-01  3.377658e-01
   [12,] -2.254698e+00  4.915125e+00  1.951355e+00 -1.215630e+00  1.700315e-01  1.075539e-01
   [13,]  1.714346e+00  1.191014e+00  1.054853e+00  3.211466e-01  2.940316e-01 -2.832409e-01
   [14,]  4.214521e+00 -5.515752e-01  3.050290e-01  7.087223e-01  7.976724e-01  3.980669e-02
   [15,]  2.271122e+00  1.078424e+00  5.187903e-01  1.175299e-01  5.660790e-01  2.481744e-01
   [16,]  7.655863e-01  2.041193e+00  1.014612e+00  1.844765e-02  5.616512e-02 -1.959685e-01
   [17,]  1.520407e+00  1.432379e+00  1.091449e+00  2.042299e-01  2.334932e-01 -3.524935e-01
   [18,]  1.490633e+00  1.291377e+00  6.930873e-01  3.470345e-01  6.483829e-02 -9.133176e-02
   [19,] -1.993183e-01  3.201744e+00  1.191853e+00 -5.892269e-01  4.491340e-01  4.036008e-01
   [20,]  3.770710e-01  2.434379e+00  7.232593e-01 -1.258865e-01  2.812814e-02  1.074989e-01
   [21,]  2.922852e-01  2.390471e+00  1.332478e+00 -8.020927e-02  1.240978e-01 -2.261130e-01
   [22,]  7.804409e-01  2.075181e+00  2.583384e-01 -8.693810e-02  3.394759e-03  4.109240e-01
   [23,]  1.371866e+00  9.825359e-01  9.688047e-01  7.256051e-01 -2.492550e-01 -5.812617e-01
   [24,] -2.382359e+00  4.714902e+00  1.062824e+00 -1.046896e+00 -3.540856e-02  3.003221e-01
   [25,]  1.292267e+00  2.096809e+00  6.511071e-01 -4.066068e-01  6.834969e-01  5.933871e-01
   [26,] -2.777582e+00  4.944257e+00  1.755964e+00 -1.083355e+00 -5.785753e-02  1.077638e-02
   [27,]  1.190319e+00  1.795224e+00  7.511231e-01 -9.836268e-02  3.912924e-01  6.458790e-02
   [28,]  2.138132e+00  7.503777e-01  5.815927e-01  2.363811e-01  3.601793e-01  5.370821e-02
   [29,]  2.204313e+00  9.435450e-01  5.069415e-01  1.122817e-01  4.312682e-01  1.689245e-01
   [30,]  2.534510e+00  9.701706e-01  6.262459e-01  7.652271e-02  7.558703e-01  2.656094e-01
   [31,]  1.554444e+00  1.436907e+00  1.008768e+00  9.023136e-02  5.609106e-01  1.073257e-01
   [32,]  1.819823e+00  1.362150e+00  6.227748e-01  4.177417e-02  4.474395e-01  1.647032e-01
   [33,]  3.601595e+00 -6.392722e-01  4.603145e-01  4.955963e-01  5.099287e-01 -2.749128e-01
   [34,]  2.645202e-01  2.481165e+00  9.207404e-01 -6.058332e-01  4.796779e-01  3.919094e-01
   [35,]  3.421297e+00  1.147581e-02  4.038722e-01  5.460176e-01  6.062009e-01  1.169545e-02
   [36,]  2.471482e-01  2.481274e+00  8.898948e-01 -5.414952e-01  3.639352e-01  1.736046e-01
   [37,]  2.150533e+00  1.295666e+00  5.544389e-01  1.153722e-02  6.516006e-01  3.546918e-01
   [38,]  3.006876e+00  8.594070e-03  8.192747e-01  5.894401e-01  4.886067e-01 -3.422591e-01
   [39,]  1.854183e-02  2.884613e+00  1.174663e+00 -5.676400e-01  3.734326e-01  1.501689e-01
   [40,]  1.393717e+00  1.754510e+00  7.957141e-01 -1.746703e-01  5.928169e-01  3.670016e-01
   [41,]  2.260195e+00  6.986164e-01  1.076252e+00  4.301822e-01  3.090228e-01 -5.521915e-01
   [42,]  5.987150e-02  2.642965e+00  1.483975e+00 -2.125679e-01  2.386474e-01 -2.420331e-01
   [43,]  4.316153e-01  2.249363e+00  8.628850e-01 -2.927017e-01  3.697471e-01  1.355797e-01
   [44,]  1.114305e+00  1.925993e+00  7.971480e-01 -1.876282e-01  4.587106e-01  1.283862e-01
   [45,]  1.854586e-01  2.651860e+00  1.294539e+00 -4.079149e-01  5.125728e-01  2.486702e-01
   [46,]  1.893873e-01  2.772122e+00  1.357466e+00 -4.803023e-01  5.973779e-01  3.360247e-01
   [47,]  7.292379e-01  1.737208e+00 -2.609344e-01 -9.363625e-02 -4.368283e-02  4.379741e-01
   [48,]  1.847550e+00  1.276145e+00  7.167369e-01 -2.892983e-02  5.097654e-01  1.042807e-01
   [49,]  3.626601e+00 -8.547987e-02  2.693330e-01  4.713822e-01  7.559704e-01  1.828194e-01
   [50,]  1.930785e+00  1.394545e+00  6.893944e-01 -6.183811e-02  6.532000e-01  2.401690e-01
   [51,] -6.078538e-02  3.176777e+00  1.211221e+00 -6.759564e-01  4.986540e-01  3.306064e-01
   [52,]  2.339012e+00  6.987610e-01  1.334819e-01  2.047737e-01  4.287504e-01  2.192671e-01
   [53,]  9.842678e-01  2.225258e+00  1.096110e+00 -3.051193e-01  5.516512e-01  1.454291e-01
   [54,]  1.455407e+00  1.672343e+00  9.417622e-01 -6.577560e-02  4.895289e-01 -1.238487e-03
   [55,]  4.300761e+00 -9.084043e-01  2.143078e-01  9.553783e-01  6.595707e-01 -6.440636e-02
   [56,]  4.710233e-01  1.423581e+00  1.662245e+00  4.631328e-01  2.677690e-01 -3.133157e-01
   [57,]  1.649192e+00  1.423337e+00  1.162990e+00  7.234994e-02  5.394208e-01 -1.026508e-01
   [58,]  2.768852e+00  1.478174e-01  8.899297e-01  5.451073e-01  3.915213e-01 -3.815478e-01
   [59,]  2.099573e+00  1.023482e+00  9.221362e-01  1.216150e-01  6.103257e-01  1.984430e-02
   [60,]  1.569637e+00  1.275754e+00  1.234741e+00 -7.314692e-03  4.644239e-01 -2.085609e-01
   [61,] -1.498821e-01  3.080767e+00  1.334832e+00 -5.264526e-01  3.427686e-01  4.359203e-02
   [62,]  2.568969e+00  9.856887e-01  6.948187e-01  8.865470e-02  7.938021e-01  2.354448e-01
   [63,]  1.673655e+00  9.690939e-01  8.796657e-01  1.193615e-01  4.153881e-01 -3.410592e-02
   [64,] -4.853341e-02  2.629401e+00  1.554874e+00 -1.519071e-01  2.415414e-01 -2.272155e-01
   [65,]  2.410371e+00  7.924908e-01  2.061873e-01  2.658986e-01  4.473866e-01  2.400031e-01
   [66,]  2.280413e+00  9.146890e-01  8.091815e-01  2.796219e-01  4.894963e-01 -1.584045e-01
   [67,] -2.505357e-01  3.082187e+00  1.431683e+00 -4.758327e-01  3.200074e-01  8.442382e-02
   [68,] -1.108737e+00  4.083915e+00  1.210488e+00 -1.059773e+00  4.674161e-01  5.476804e-01
   [69,]  2.146383e+00  1.108095e+00  5.400660e-01  1.272634e-01  6.767289e-01  3.062165e-01
   [70,] -3.332913e+00  5.029997e+00  1.529292e+00 -8.384134e-01 -1.736625e-01 -2.189377e-01
   [71,]  1.917417e+00  1.118149e+00  1.174363e+00  2.949981e-01  3.501859e-01 -4.599910e-01
   [72,]  2.510081e+00  4.956159e-01  4.078296e-01  5.372300e-01  4.147258e-01 -5.399929e-02
   [73,]  1.842099e+00  1.296739e+00  6.460042e-01 -8.950588e-02  5.084560e-01  2.165966e-01
   [74,]  1.946117e+00  1.088249e+00  9.342168e-01  1.443770e-01  5.197305e-01 -4.137615e-02
   [75,]  4.010638e+00 -9.043144e-01 -9.277337e-02  5.143939e-01  5.910971e-01  1.226830e-01
   [76,]  2.317837e+00  5.746190e-01  5.556606e-01  4.191115e-01  4.862667e-01  5.763530e-02
   [77,]  3.074563e-01  2.534392e+00  1.401686e+00 -3.824038e-01  4.268553e-01 -1.232876e-03
   [78,]  1.167005e+00  1.809348e+00  1.129422e+00 -4.721470e-02  4.501069e-01 -2.529551e-02
   [79,]  1.632560e+00  1.468100e+00  9.918127e-01  1.531525e-03  4.588732e-01 -9.019922e-02
   [80,]  2.741739e+00  5.302697e-01  3.604595e-01  2.144569e-01  5.750893e-01  2.100966e-01
   [81,]  1.971533e+00  1.017793e+00  9.299388e-01  1.804371e-01  6.125357e-01  6.301301e-02
   [82,]  2.721711e+00  2.680274e-01  2.327942e-02  2.907707e-01  3.877542e-01  2.068869e-01
   [83,]  2.135547e-01  2.706228e+00  1.040692e+00 -4.148453e-01  3.578101e-01  1.113258e-01
   [84,]  2.376955e+00  6.755834e-01  6.517979e-01  1.401077e-01  4.918054e-01 -3.603275e-02
   [85,]  2.360438e+00  7.127484e-01  6.694583e-01  2.299599e-01  5.813277e-01  5.283155e-02
   [86,]  4.128479e-02  2.776277e+00  1.415481e+00 -3.831971e-01  3.556296e-01 -7.593776e-03
   [87,] -4.389638e-01  3.392092e+00  8.501625e-01 -7.063879e-01  3.920052e-01  4.741925e-01
   [88,]  1.770532e+00  1.189871e+00  1.037435e+00  1.423073e-01  4.706187e-01 -1.423005e-01
   [89,]  1.930187e+00  9.620627e-01  7.382488e-01  2.714791e-01  3.795639e-01 -9.154777e-02
   [90,]  2.155863e+00  9.065004e-01  8.945369e-01  3.965488e-01  4.249762e-01 -1.818956e-01
   [91,]  1.523907e-01  2.862417e+00  1.276905e+00 -4.713282e-01  3.751188e-01  9.063007e-02
   [92,]  9.995124e-01  1.823088e+00  1.170911e+00  1.344113e-02  4.476761e-01 -1.903802e-02
   [93,]  1.110998e+00  2.110401e+00  9.938894e-01 -2.666306e-01  5.675575e-01  1.759913e-01
   [94,]  5.192421e-01  2.435532e+00  1.302713e+00 -3.091739e-01  4.550121e-01  7.071398e-02
   [95,]  7.936573e-01  2.224945e+00  1.313504e+00 -3.269706e-01  4.517846e-01 -6.770236e-02
   [96,]  8.403641e-01  2.256260e+00  1.178937e+00 -2.087666e-01  4.417500e-01 -3.706420e-02
   [97,]  1.148718e+00  1.669228e+00  2.253819e-02 -2.459589e-01  2.848365e-01  5.532400e-01
   [98,]  1.185487e+00  1.572026e+00  3.058708e-01 -8.272972e-02  2.987945e-01  1.777574e-01
   [99,]  2.380042e+00  6.606109e-01  7.417901e-01 -1.160135e-01  6.060816e-01  9.513828e-02
  [100,]  3.908648e+00 -6.824891e-01 -8.243134e-02  8.576197e-01  4.371080e-01 -5.428605e-02
  [101,]  2.174089e+00  1.264951e+00  6.906613e-01  2.511012e-02  7.083800e-01  2.293995e-01
  [102,]  1.618655e+00  1.738117e+00  8.890477e-01 -1.790972e-01  6.765123e-01  2.320327e-01
  [103,]  1.964422e+00  1.146938e+00  4.015434e-01  6.802530e-02  4.616389e-01  2.143541e-01
  [104,] -1.042642e+00  3.631385e+00  1.599374e+00 -6.467583e-01  2.788696e-01  1.557212e-01
  [105,] -3.249017e-01  2.867747e+00  1.670345e+00 -2.033250e-01  3.077568e-01 -1.065481e-01
  [106,]  1.571414e-01  2.589093e+00  1.581907e+00 -1.917588e-01  1.676164e-01 -4.610063e-01
  [107,]  3.075156e+00 -2.184416e-01  1.588215e-01  3.996932e-01  3.985028e-01  1.536229e-01
  [108,]  9.464263e-01  2.334568e+00  9.881629e-01 -4.426485e-01  6.270796e-01  3.568946e-01
  [109,] -6.112114e-01  3.051586e+00  8.299057e-01 -3.936699e-01  1.653944e-01  1.061102e-01
  [110,] -2.891211e-01  3.207652e+00  1.054573e+00 -6.829069e-01  3.826751e-01  3.203025e-01
  [111,]  1.958538e+00  1.147893e+00  4.340068e-01  1.390249e-01  5.734960e-01  2.825576e-01
  [112,]  8.340147e-02  2.055687e+00  1.394886e+00  1.639048e-01  1.670183e-01 -3.031638e-01
  [113,]  1.948431e+00  1.402737e+00  5.745996e-01 -8.495996e-02  6.732018e-01  3.401546e-01
  [114,]  1.331906e+00  1.598962e+00  1.169111e+00  7.239512e-02  4.155062e-01 -1.724547e-01
  [115,]  2.089271e+00  8.270733e-01  9.166172e-01  3.041473e-01  5.516338e-01 -6.825203e-02
  [116,]  6.652700e-01  2.346548e+00  1.092434e+00 -3.183514e-01  5.451402e-01  2.086985e-01
  [117,]  2.182842e+00  9.434587e-01  7.752312e-01  3.309184e-01  5.282321e-01 -5.022710e-02
  [118,]  5.013390e-01  2.533947e+00  1.210166e+00 -3.879506e-01  4.280099e-01  5.845775e-02
  [119,]  2.398461e+00  5.670894e-01  6.248191e-01  2.635547e-01  5.100397e-01  1.409121e-03
  [120,] -1.188190e+00  3.900769e+00  1.819585e+00 -8.126923e-01  3.070541e-01  4.488749e-02
  [121,]  2.227782e+00  1.044966e+00  9.926182e-01  1.859686e-01  5.704792e-01 -1.576408e-01
  [122,]  6.265958e-01  2.374716e+00  7.351219e-01 -2.704773e-01  3.419254e-01  2.442308e-01
  [123,] -1.527206e+00  3.792398e+00  1.498283e+00 -5.826627e-01  2.284069e-01  1.143657e-01
  [124,]  1.949358e+00  7.736311e-01  4.591902e-01  4.113739e-01  3.819606e-01  2.007939e-02
  [125,]  9.546985e-01  1.712587e+00  1.279208e+00  6.244127e-02  3.509531e-01 -2.032127e-01
  [126,]  4.175919e+00 -6.018102e-01  3.044794e-01  5.929268e-01  8.285393e-01  6.653842e-02
  [127,]  3.073146e+00  2.745347e-01  7.352831e-01  4.814719e-01  5.952238e-01 -1.992386e-01
  [128,]  1.774820e+00  1.289509e+00  6.669919e-01 -1.641910e-02  5.258865e-01  6.206168e-02
  [129,]  5.375029e-01  2.595559e+00  9.792746e-01 -4.384009e-01  5.128877e-01  2.760494e-01
  [130,]  1.598670e+00  1.663336e+00  8.385291e-01 -1.327358e-01  5.606892e-01  2.006034e-01
  [131,]  8.938875e-02  2.782653e+00  9.391116e-01 -4.292095e-01  3.250307e-01  1.562155e-01
  [132,]  4.491808e+00 -1.287511e+00  3.988355e-01  6.978455e-01  6.750664e-01 -2.784215e-01
  [133,]  3.277880e+00 -6.584339e-02  6.927470e-01  4.305486e-01  6.061131e-01 -2.223229e-01
  [134,]  1.638951e+00  1.263582e+00  1.160531e+00  2.927038e-01  3.091054e-01 -3.312571e-01
  [135,]  3.296838e+00  2.304245e-01  3.456176e-01  2.485582e-01  8.001902e-01  3.026274e-01
  [136,]  3.596579e+00 -2.680050e-01  5.883358e-01  5.459397e-01  6.462194e-01 -1.850761e-01
  [137,]  2.454556e+00  6.178760e-01  7.683971e-01  3.774372e-01  4.885282e-01 -9.600194e-02
  [138,]  3.253124e+00  2.323717e-01  4.309048e-01  3.185377e-01  7.544745e-01  1.730422e-01
  [139,]  1.970733e+00  1.018884e+00  1.053743e+00  1.557218e-01  5.270654e-01 -1.585225e-01
  [140,]  1.732010e+00  1.305955e+00  9.698273e-01  4.194498e-02  5.546218e-01  9.694220e-03
  [141,]  2.225008e+00  7.309146e-01  6.902748e-01  3.084738e-02  4.856930e-01 -3.255164e-02
  [142,]  1.097909e+00  1.474785e+00  1.536971e+00  3.588217e-01  2.872577e-01 -4.562505e-01
  [143,] -2.784413e-01  2.874435e+00  1.589095e+00 -2.842018e-01  7.354918e-02 -4.702583e-01
  [144,]  3.543882e+00 -3.235219e-01  6.006674e-01  3.081664e-01  7.149754e-01 -8.164553e-02
  [145,]  2.129066e+00  8.346168e-01  6.714373e-01  2.156418e-01  6.212377e-01  1.639034e-01
  [146,]  2.778449e+00  4.690608e-01  5.346695e-01  3.530826e-01  5.238112e-01 -1.691195e-02
  [147,]  3.318391e+00 -4.019898e-02  1.329822e-01  4.554767e-01  5.679152e-01  1.507273e-01
  [148,]  1.961713e+00  1.228795e+00  6.940204e-01 -2.294416e-01  6.582778e-01  2.966505e-01
  [149,]  6.883506e-01  2.237150e+00  1.111198e+00 -1.600857e-01  3.408241e-01 -1.358345e-01
  [150,]  2.674405e+00  6.525884e-01  5.475495e-01  3.712414e-02  7.278302e-01  2.354851e-01
  [151,]  3.556008e+00 -2.199490e-01  4.501220e-01  4.201832e-01  7.066383e-01 -9.600422e-03
  [152,]  7.393203e-01  2.392853e+00  1.287022e+00 -3.290517e-01  4.924273e-01  1.329802e-02
  [153,]  1.280523e+00  2.026696e+00  8.107106e-01 -2.868386e-01  6.492289e-01  3.371576e-01
  [154,]  2.170348e+00  1.131354e+00  6.813562e-01 -6.638855e-02  6.787592e-01  2.330541e-01
  [155,]  1.849284e+00  9.991822e-01  5.051238e-01 -5.257306e-02  4.897946e-01  2.653435e-01
  [156,]  1.870168e+00  1.316393e+00  6.006543e-01 -4.324355e-02  5.540260e-01  2.129298e-01
  [157,]  1.371166e+00  1.486129e+00  1.384976e+00  2.654090e-01  3.073133e-01 -4.855113e-01
  [158,]  7.454876e-01  2.311675e+00  9.709886e-01 -3.586801e-01  4.676539e-01  1.803653e-01
  [159,]  3.140966e+00 -1.743634e-01  3.273602e-01  3.639460e-01  5.143199e-01 -9.705433e-02
  [160,]  2.111663e+00  1.092447e+00  5.673255e-01  4.419451e-02  5.741471e-01  1.651516e-01
  [161,]  2.319455e+00  8.086177e-01  7.777198e-01  3.865898e-01  5.497539e-01 -1.071935e-01
  [162,]  1.545696e+00  1.581049e+00  7.992864e-01 -1.243311e-01  5.378649e-01  1.187910e-01
  [163,]  3.098880e-01  2.733051e+00  1.364830e+00 -3.830859e-01  4.013289e-01 -2.839183e-02
  [164,]  1.053480e+00  2.035085e+00  1.117748e+00 -2.013981e-01  4.824652e-01 -2.245388e-02
  [165,]  3.807886e+00 -5.048465e-01  5.647328e-01  7.574360e-01  5.701567e-01 -3.459630e-01
  [166,]  2.855763e+00  5.998251e-01  4.149993e-01  3.069515e-01  6.704102e-01  1.883833e-01
 [ reached getOption("max.print") -- omitted 35229 rows ]
get_eig(res)

Screeplot

get_eig(res) %>%
  ggplot(aes(x = 1:6, y = cumulative.variance.percent)) +
  geom_line() +
  geom_point() +
  geom_hline(yintercept = 80) +
  xlab("Principal Component") +
  ylab("Proportion of Variance Explained") +
  ggtitle("Scree Plot of Principal Component for Batting Statistics")

2 Principal Components: PC1 and PC2

fviz_screeplot(res, main = "Scree Plot")

Can Identify an elbow in 3.

Biplot

res %>%
  fviz_pca_var(axes = c(1,2),
               col.var = "contrib",
               gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
               repel = TRUE
               )

Cluster Analysis (Lesson 5)

#NOT COMPLETE!!!!! This was just a test, bstats is way too big.
bstats_best <- bstats %>%
  filter(PA >= 600)

eu_dist <- get_dist(careerBatting_num1, method = 'euclidean')
hc_complete <- hclust(eu_dist, method = 'complete')

plot(hc_complete)

Silhouette

res_test <- careerBatting_num1 %>% kmeans(7)
  str(res_test)
List of 9
 $ cluster     : int [1:313] 6 1 4 3 3 3 2 2 4 6 ...
 $ centers     : num [1:7, 1:6] 0.274 0.282 0.28 0.279 0.295 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:7] "1" "2" "3" "4" ...
  .. ..$ : chr [1:6] "BA" "PA" "SlugPct" "OBP" ...
 $ totss       : num 651407
 $ withinss    : num [1:7] 15184 28979 12723 15413 13086 ...
 $ tot.withinss: num 110607
 $ betweenss   : num 540799
 $ size        : int [1:7] 50 102 44 36 33 21 27
 $ iter        : int 3
 $ ifault      : int 0
 - attr(*, "class")= chr "kmeans"
distance <- get_dist(careerBatting_num1, method = "euclidean")
sil <- silhouette(x = res_test$cluster, dist = distance)
summary(sil)
Silhouette of 313 units in 7 clusters from silhouette.default(x = res_test$cluster, dist = distance) :
 Cluster sizes and average silhouette widths:
       50       102        44        36        33        21        27 
0.3520849 0.4329740 0.3000351 0.2682495 0.3614668 0.4120772 0.3306556 
Individual silhouette widths:
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-0.06379  0.21423  0.38618  0.36465  0.51790  0.64837 
sil %>% head()
     cluster neighbor  sil_width
[1,]       6        5 0.35570879
[2,]       1        7 0.35948971
[3,]       4        6 0.21976316
[4,]       3        1 0.55018020
[5,]       3        4 0.08291147
[6,]       3        1 0.10924787
fviz_silhouette(sil)

fviz_nbclust(careerBatting_num1, hcut, hc_method = "complete", hc_metric = "euclidean", method = "wss")

##This is to test other values of K for the silhouette method.
res_test1 <- careerBatting_num1 %>% kmeans(10 )
  str(res_test1)
List of 9
 $ cluster     : int [1:313] 10 4 6 3 3 3 8 2 6 10 ...
 $ centers     : num [1:10, 1:6] 0.287 0.278 0.285 0.273 0.269 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:10] "1" "2" "3" "4" ...
  .. ..$ : chr [1:6] "BA" "PA" "SlugPct" "OBP" ...
 $ totss       : num 651407
 $ withinss    : num [1:10] 5561 3421 7533 8158 17850 ...
 $ tot.withinss: num 82705
 $ betweenss   : num 568701
 $ size        : int [1:10] 47 26 32 35 31 29 28 40 25 20
 $ iter        : int 4
 $ ifault      : int 0
 - attr(*, "class")= chr "kmeans"
distance <- get_dist(careerBatting_num1, method="euclidean")
sil <- silhouette(x = res_test1$cluster, dist = distance)
summary(sil)
Silhouette of 313 units in 10 clusters from silhouette.default(x = res_test1$cluster, dist = distance) :
 Cluster sizes and average silhouette widths:
       47        26        32        35        31        29        28        40        25        20 
0.4123528 0.2235291 0.3095730 0.3662893 0.2348064 0.2444163 0.3084443 0.2360885 0.4169711 0.3504059 
Individual silhouette widths:
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-0.07461  0.18155  0.32364  0.31245  0.44921  0.62394 
sil %>% head()
     cluster neighbor  sil_width
[1,]      10        6 0.25143884
[2,]       4        8 0.56107068
[3,]       6       10 0.21097598
[4,]       3        2 0.37769870
[5,]       3        6 0.09750601
[6,]       3        4 0.41195414
fviz_silhouette(sil)

Diana

Linear Regression (Lesson 6)

Linear Regression comparing team payroll and win rate.

teams = as.data.table(Teams)
teams = teams[, .(yearID,
                  lgID = as.character(lgID),
                  teamID = as.character(teamID),
                  franchID = as.character(franchID),
                  Rank, G, W, L, R, ERA, SO,
                  WinPercent = W/(W+L))]

salaries = as.data.table(Salaries)
salaries = salaries[, c("lgID", "teamID", "salary1M") :=
                      list(as.character(lgID), as.character(teamID), salary / 1e6L)]
payroll = salaries[, .(payroll = sum(salary1M)), by=.(teamID, yearID)]
teamPayroll = merge(teams, payroll, by = c("teamID", "yearID"))
ggplot(data = teamPayroll, aes(x = payroll, y = WinPercent)) + geom_point()  + labs(x = "Payroll (in millions)", y = "Win Percentage") +
  geom_smooth(method = lm, se = FALSE)

mod_lm <- lm(data = teamPayroll, WinPercent~payroll)
mod_lm

Call:
lm(formula = WinPercent ~ payroll, data = teamPayroll)

Coefficients:
(Intercept)      payroll  
  0.4796007    0.0003396  
summary(mod_lm)

Call:
lm(formula = WinPercent ~ payroll, data = teamPayroll)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.230866 -0.048237 -0.000954  0.049584  0.211074 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.4796007  0.0037895 126.561  < 2e-16 ***
payroll     0.0003396  0.0000512   6.633 5.61e-11 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.06714 on 916 degrees of freedom
Multiple R-squared:  0.04583,   Adjusted R-squared:  0.04479 
F-statistic:    44 on 1 and 916 DF,  p-value: 5.611e-11
payroll_pred <- teamPayroll %>%
  add_predictions(mod_lm)

payroll_pred %>%
  filter(yearID >= 2010) %>%
  arrange(desc(pred)) %>%
  head(25)
payroll_pred %>%
  filter(yearID >= 2010) %>%
  arrange(desc(WinPercent)) %>%
  head(25)

Only five teams are in the top 25 of both payroll and win percentage in the 2010s. These teams are the 2011 Phillies, 2011 Yankees, 2010 Yankees, 2012 Yankees, and 2016 Rangers. This shows that spending the most money doesn’t automatically mean you are getting the best product on the field. ## Simple Linear Regression

Multiple Linear Regression

bstats_salary <- bstats_salary %>%
  filter(PA >= 100) %>%
  filter(salary > 500000)
lm_mod <- lm(salary ~ H, HR, data = bstats_salary)
summary(lm_mod)

Call:
lm(formula = salary ~ H, data = bstats_salary, subset = HR)

Residuals:
    Min      1Q  Median      3Q     Max 
-358510 -224975  -68095   79439 1124078 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 814969.19    8653.86  94.174   <2e-16 ***
H              573.87      64.96   8.834   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 314600 on 6957 degrees of freedom
Multiple R-squared:  0.01109,   Adjusted R-squared:  0.01095 
F-statistic: 78.04 on 1 and 6957 DF,  p-value: < 2.2e-16
lm_mod_prd <- bstats_salary %>% add_predictions(lm_mod)
lm_mod_prd
full_model <- lm(salary ~., data = bstats_sure)
summary(full_model)

Call:
lm(formula = salary ~ ., data = bstats_sure)

Residuals:
     Min       1Q   Median       3Q      Max 
-6914779 -1878645 -1019496   403743 29613794 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -1794883     277282  -6.473 9.93e-11 ***
OPS           9325457     399105  23.366  < 2e-16 ***
BABIP       -10628363    1053976 -10.084  < 2e-16 ***
K_Percent    -3344230     512360  -6.527 6.95e-11 ***
BB_Percent    7390060     977602   7.559 4.31e-14 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 3549000 on 13381 degrees of freedom
Multiple R-squared:  0.08167,   Adjusted R-squared:  0.0814 
F-statistic: 297.5 on 4 and 13381 DF,  p-value: < 2.2e-16
full_model_pred <- bstats_sure %>% add_predictions(full_model)
full_model_pred
adv_stat_mod <- lm(salary ~ OPS, data = bstats_salary)
summary(adv_stat_mod)

Call:
lm(formula = salary ~ OPS, data = bstats_salary)

Residuals:
     Min       1Q   Median       3Q      Max 
-7024728 -2574846 -1268208  1064627 28064361 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -4206268     319409  -13.17   <2e-16 ***
OPS         10793278     419340   25.74   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 4184000 on 7207 degrees of freedom
Multiple R-squared:  0.08418,   Adjusted R-squared:  0.08406 
F-statistic: 662.5 on 1 and 7207 DF,  p-value: < 2.2e-16

Resampling Methods

bstats_salary_21century <- bstats_salary %>%
  filter(yearID >= 2002)
bstats_salary_21century %>% head(10)
# setting seed to generate a reproducible random sampling
set.seed(123)
 
# defining training control as cross-validation and value of K equal to 10
train_control <- trainControl(method = "cv",
                              number = 10)

# training the model
model <- train(salary ~ OBP, data = bstats_salary_21century,
               method = "lm",
               trControl = train_control)

print(model)
Linear Regression 

4127 samples
   1 predictor

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 3712, 3714, 3713, 3715, 3715, 3715, ... 
Resampling results:

  RMSE     Rsquared   MAE    
  4903905  0.1069253  3691779

Tuning parameter 'intercept' was held constant at a value of TRUE

Feature Selection

bstats_salary_numvars <- bstats_salary_21century %>% 
  select(c(6:32))
regfit.full = regsubsets(salary ~., data = bstats_salary_numvars,  nvmax = 13, method="exhaustive")
3  linear dependencies found
Reordering variables and trying again:
summary(regfit.full)
Subset selection object
Call: regsubsets.formula(salary ~ ., data = bstats_salary_numvars, 
    nvmax = 13, method = "exhaustive")
26 Variables  (and intercept)
           Forced in Forced out
G              FALSE      FALSE
AB             FALSE      FALSE
R              FALSE      FALSE
H              FALSE      FALSE
X2B            FALSE      FALSE
X3B            FALSE      FALSE
HR             FALSE      FALSE
RBI            FALSE      FALSE
SB             FALSE      FALSE
CS             FALSE      FALSE
BB             FALSE      FALSE
SO             FALSE      FALSE
IBB            FALSE      FALSE
HBP            FALSE      FALSE
SH             FALSE      FALSE
SF             FALSE      FALSE
GIDP           FALSE      FALSE
BA             FALSE      FALSE
SlugPct        FALSE      FALSE
OBP            FALSE      FALSE
BABIP          FALSE      FALSE
K_Percent      FALSE      FALSE
BB_Percent     FALSE      FALSE
PA             FALSE      FALSE
TB             FALSE      FALSE
OPS            FALSE      FALSE
1 subsets of each size up to 14
Selection Algorithm: exhaustive
          G   AB  R   H   X2B X3B HR  RBI SB  CS  BB  SO  IBB HBP SH  SF  GIDP BA  PA  TB  SlugPct OBP
1  ( 1 )  " " " " " " " " " " " " " " "*" " " " " " " " " " " " " " " " " " "  " " " " " " " "     " "
2  ( 1 )  " " " " " " " " " " " " " " "*" " " " " " " " " " " " " " " " " " "  " " " " " " " "     " "
3  ( 1 )  "*" " " " " " " " " " " " " " " " " " " " " " " " " " " "*" " " " "  " " "*" " " " "     " "
4  ( 1 )  "*" " " " " " " " " " " " " " " " " " " " " " " "*" " " "*" " " " "  " " "*" " " " "     " "
5  ( 1 )  "*" " " " " " " " " "*" " " " " " " " " " " " " "*" " " "*" " " " "  " " "*" " " " "     " "
6  ( 1 )  "*" " " " " " " "*" "*" " " " " " " " " " " " " "*" " " "*" " " " "  " " "*" " " " "     " "
7  ( 1 )  "*" " " " " " " "*" "*" " " " " " " " " " " " " "*" " " "*" " " " "  " " "*" " " " "     " "
8  ( 1 )  "*" " " " " " " "*" "*" " " " " " " " " " " " " "*" " " "*" " " "*"  " " "*" " " " "     " "
9  ( 1 )  "*" " " " " " " "*" "*" " " " " " " " " " " " " "*" " " "*" " " "*"  " " "*" " " "*"     " "
10  ( 1 ) "*" " " " " " " "*" "*" " " " " " " " " " " " " "*" " " "*" " " "*"  " " "*" " " "*"     " "
11  ( 1 ) "*" " " " " " " "*" "*" " " " " "*" "*" " " " " "*" " " "*" " " "*"  " " "*" " " "*"     " "
12  ( 1 ) "*" " " " " " " "*" "*" " " " " "*" "*" " " " " "*" " " "*" " " "*"  " " "*" " " "*"     " "
13  ( 1 ) "*" " " " " " " "*" "*" " " "*" "*" "*" " " "*" "*" " " "*" " " "*"  " " "*" " " "*"     " "
14  ( 1 ) "*" "*" " " " " "*" "*" "*" "*" "*" "*" "*" " " "*" " " "*" " " "*"  " " " " " " " "     " "
          OPS BABIP K_Percent BB_Percent
1  ( 1 )  " " " "   " "       " "       
2  ( 1 )  " " " "   " "       "*"       
3  ( 1 )  " " " "   " "       " "       
4  ( 1 )  " " " "   " "       " "       
5  ( 1 )  " " " "   " "       " "       
6  ( 1 )  " " " "   " "       " "       
7  ( 1 )  "*" " "   " "       " "       
8  ( 1 )  "*" " "   " "       " "       
9  ( 1 )  " " " "   " "       "*"       
10  ( 1 ) " " " "   "*"       "*"       
11  ( 1 ) " " " "   " "       "*"       
12  ( 1 ) " " " "   "*"       "*"       
13  ( 1 ) " " " "   " "       "*"       
14  ( 1 ) "*" "*"   " "       " "       
summary(regfit.full)$rsq
 [1] 0.1735921 0.1997069 0.2379362 0.2627538 0.2763364 0.2820003 0.2891514 0.2937692 0.2962186 0.2979275
[11] 0.2997391 0.3013596 0.3020423 0.3031258
plot(summary(regfit.full)$rsq)

reg.summary <- summary(regfit.full) #get the summary

par(mfrow=c(2,2))
#rss plot -  NOT USEFUL
plot(reg.summary$rss ,xlab="Number of Variables ",ylab="RSS",type="l")

#adjr2 plot
plot(reg.summary$adjr2 ,xlab="Number of Variables ", ylab="Adjusted RSq",type="l")

max_adjr2 <- which.max(reg.summary$adjr2)
points(max_adjr2,reg.summary$adjr2[max_adjr2], col="red",cex=2,pch=20)

# AIC criterion (Cp) to minimize
plot(reg.summary$cp ,xlab="Number of Variables ",ylab="Cp", type='l')

min_cp <- which.min(reg.summary$cp )
points(min_cp, reg.summary$cp[min_cp],col="red",cex=2,pch=20)

# BIC criterion to minimize
plot(reg.summary$bic ,xlab="Number of Variables ",ylab="BIC",type='l')

min_bic <- which.min(reg.summary$bic)
points(min_bic,reg.summary$bic[min_bic],col="red",cex=2,pch=20)

regfit.fwd = regsubsets(salary ~. , data=bstats_salary_numvars, nvmax=13, method ="forward")
3  linear dependencies found
Reordering variables and trying again:
summary(regfit.fwd)
Subset selection object
Call: regsubsets.formula(salary ~ ., data = bstats_salary_numvars, 
    nvmax = 13, method = "forward")
26 Variables  (and intercept)
           Forced in Forced out
G              FALSE      FALSE
AB             FALSE      FALSE
R              FALSE      FALSE
H              FALSE      FALSE
X2B            FALSE      FALSE
X3B            FALSE      FALSE
HR             FALSE      FALSE
RBI            FALSE      FALSE
SB             FALSE      FALSE
CS             FALSE      FALSE
BB             FALSE      FALSE
SO             FALSE      FALSE
IBB            FALSE      FALSE
HBP            FALSE      FALSE
SH             FALSE      FALSE
SF             FALSE      FALSE
GIDP           FALSE      FALSE
BA             FALSE      FALSE
SlugPct        FALSE      FALSE
OBP            FALSE      FALSE
BABIP          FALSE      FALSE
K_Percent      FALSE      FALSE
BB_Percent     FALSE      FALSE
PA             FALSE      FALSE
TB             FALSE      FALSE
OPS            FALSE      FALSE
1 subsets of each size up to 14
Selection Algorithm: forward
          G   AB  R   H   X2B X3B HR  RBI SB  CS  BB  SO  IBB HBP SH  SF  GIDP BA  PA  TB  SlugPct OBP
1  ( 1 )  " " " " " " " " " " " " " " "*" " " " " " " " " " " " " " " " " " "  " " " " " " " "     " "
2  ( 1 )  " " " " " " " " " " " " " " "*" " " " " " " " " " " " " " " " " " "  " " " " " " " "     " "
3  ( 1 )  "*" " " " " " " " " " " " " "*" " " " " " " " " " " " " " " " " " "  " " " " " " " "     " "
4  ( 1 )  "*" "*" " " " " " " " " " " "*" " " " " " " " " " " " " " " " " " "  " " " " " " " "     " "
5  ( 1 )  "*" "*" " " " " " " "*" " " "*" " " " " " " " " " " " " " " " " " "  " " " " " " " "     " "
6  ( 1 )  "*" "*" " " " " " " "*" " " "*" " " " " " " " " " " " " "*" " " " "  " " " " " " " "     " "
7  ( 1 )  "*" "*" " " " " "*" "*" " " "*" " " " " " " " " " " " " "*" " " " "  " " " " " " " "     " "
8  ( 1 )  "*" "*" " " " " "*" "*" " " "*" " " " " " " " " " " " " "*" " " "*"  " " " " " " " "     " "
9  ( 1 )  "*" "*" " " " " "*" "*" " " "*" " " " " " " " " "*" " " "*" " " "*"  " " " " " " " "     " "
10  ( 1 ) "*" "*" " " " " "*" "*" " " "*" " " " " " " " " "*" " " "*" " " "*"  " " " " " " " "     " "
11  ( 1 ) "*" "*" "*" " " "*" "*" " " "*" " " " " " " " " "*" " " "*" " " "*"  " " " " " " " "     " "
12  ( 1 ) "*" "*" "*" " " "*" "*" " " "*" " " "*" " " " " "*" " " "*" " " "*"  " " " " " " " "     " "
13  ( 1 ) "*" "*" "*" " " "*" "*" " " "*" "*" "*" " " " " "*" " " "*" " " "*"  " " " " " " " "     " "
14  ( 1 ) "*" "*" "*" " " "*" "*" " " "*" "*" "*" "*" " " "*" " " "*" " " "*"  " " " " " " " "     " "
          OPS BABIP K_Percent BB_Percent
1  ( 1 )  " " " "   " "       " "       
2  ( 1 )  " " " "   " "       "*"       
3  ( 1 )  " " " "   " "       "*"       
4  ( 1 )  " " " "   " "       "*"       
5  ( 1 )  " " " "   " "       "*"       
6  ( 1 )  " " " "   " "       "*"       
7  ( 1 )  " " " "   " "       "*"       
8  ( 1 )  " " " "   " "       "*"       
9  ( 1 )  " " " "   " "       "*"       
10  ( 1 ) " " " "   "*"       "*"       
11  ( 1 ) " " " "   "*"       "*"       
12  ( 1 ) " " " "   "*"       "*"       
13  ( 1 ) " " " "   "*"       "*"       
14  ( 1 ) " " " "   "*"       "*"       
reg.summary <- summary(regfit.fwd) #get the summary

par(mfrow=c(2,2))
#rss plot -  NOT USEFUL
plot(reg.summary$rss ,xlab="Number of Variables ",ylab="RSS",type="l")

#adjr2 plot
plot(reg.summary$adjr2 ,xlab="Number of Variables ", ylab="Adjusted RSq",type="l")

max_adjr2 <- which.max(reg.summary$adjr2)
points(max_adjr2,reg.summary$adjr2[max_adjr2], col="red",cex=2,pch=20)

# AIC criterion (Cp) to minimize
plot(reg.summary$cp ,xlab="Number of Variables ",ylab="Cp", type='l')

min_cp <- which.min(reg.summary$cp )
points(min_cp, reg.summary$cp[min_cp],col="red",cex=2,pch=20)

# BIC criterion to minimize
plot(reg.summary$bic ,xlab="Number of Variables ",ylab="BIC",type='l')

min_bic <- which.min(reg.summary$bic)
points(min_bic,reg.summary$bic[min_bic],col="red",cex=2,pch=20)

#Backwards stepwise
regfit.bwd = regsubsets(salary ~. , data=bstats_salary_numvars,nvmax=13, method ="backward")
3  linear dependencies found
Reordering variables and trying again:
summary(regfit.bwd)
Subset selection object
Call: regsubsets.formula(salary ~ ., data = bstats_salary_numvars, 
    nvmax = 13, method = "backward")
26 Variables  (and intercept)
           Forced in Forced out
G              FALSE      FALSE
AB             FALSE      FALSE
R              FALSE      FALSE
H              FALSE      FALSE
X2B            FALSE      FALSE
X3B            FALSE      FALSE
HR             FALSE      FALSE
RBI            FALSE      FALSE
SB             FALSE      FALSE
CS             FALSE      FALSE
BB             FALSE      FALSE
SO             FALSE      FALSE
IBB            FALSE      FALSE
HBP            FALSE      FALSE
SH             FALSE      FALSE
SF             FALSE      FALSE
GIDP           FALSE      FALSE
BA             FALSE      FALSE
SlugPct        FALSE      FALSE
OBP            FALSE      FALSE
BABIP          FALSE      FALSE
K_Percent      FALSE      FALSE
BB_Percent     FALSE      FALSE
PA             FALSE      FALSE
TB             FALSE      FALSE
OPS            FALSE      FALSE
1 subsets of each size up to 14
Selection Algorithm: backward
          G   AB  R   H   X2B X3B HR  RBI SB  CS  BB  SO  IBB HBP SH  SF  GIDP BA  PA  TB  SlugPct OBP
1  ( 1 )  " " " " " " " " " " " " " " " " " " " " "*" " " " " " " " " " " " "  " " " " " " " "     " "
2  ( 1 )  " " "*" " " " " " " " " " " " " " " " " "*" " " " " " " " " " " " "  " " " " " " " "     " "
3  ( 1 )  "*" "*" " " " " " " " " " " " " " " " " "*" " " " " " " " " " " " "  " " " " " " " "     " "
4  ( 1 )  "*" "*" " " " " " " " " " " " " " " " " "*" " " " " " " "*" " " " "  " " " " " " " "     " "
5  ( 1 )  "*" "*" " " " " " " "*" " " " " " " " " "*" " " " " " " "*" " " " "  " " " " " " " "     " "
6  ( 1 )  "*" "*" " " " " " " "*" " " " " " " " " "*" " " "*" " " "*" " " " "  " " " " " " " "     " "
7  ( 1 )  "*" "*" " " " " " " "*" " " " " " " " " "*" " " "*" " " "*" " " "*"  " " " " " " " "     " "
8  ( 1 )  "*" "*" " " " " "*" "*" " " " " " " " " "*" " " "*" " " "*" " " "*"  " " " " " " " "     " "
9  ( 1 )  "*" "*" " " " " "*" "*" " " " " " " " " "*" " " "*" " " "*" " " "*"  " " " " " " "*"     " "
10  ( 1 ) "*" "*" " " " " "*" "*" " " " " " " "*" "*" " " "*" " " "*" " " "*"  " " " " " " "*"     " "
11  ( 1 ) "*" "*" " " " " "*" "*" " " " " "*" "*" "*" " " "*" " " "*" " " "*"  " " " " " " "*"     " "
12  ( 1 ) "*" "*" " " " " "*" "*" " " "*" "*" "*" "*" " " "*" " " "*" " " "*"  " " " " " " "*"     " "
13  ( 1 ) "*" "*" " " " " "*" "*" "*" "*" "*" "*" "*" " " "*" " " "*" " " "*"  " " " " " " "*"     " "
14  ( 1 ) "*" "*" " " " " "*" "*" "*" "*" "*" "*" "*" " " "*" " " "*" " " "*"  " " " " " " "*"     " "
          OPS BABIP K_Percent BB_Percent
1  ( 1 )  " " " "   " "       " "       
2  ( 1 )  " " " "   " "       " "       
3  ( 1 )  " " " "   " "       " "       
4  ( 1 )  " " " "   " "       " "       
5  ( 1 )  " " " "   " "       " "       
6  ( 1 )  " " " "   " "       " "       
7  ( 1 )  " " " "   " "       " "       
8  ( 1 )  " " " "   " "       " "       
9  ( 1 )  " " " "   " "       " "       
10  ( 1 ) " " " "   " "       " "       
11  ( 1 ) " " " "   " "       " "       
12  ( 1 ) " " " "   " "       " "       
13  ( 1 ) " " " "   " "       " "       
14  ( 1 ) " " "*"   " "       " "       
reg.summary <- summary(regfit.bwd) #get the summary

par(mfrow=c(2,2))
#rss plot -  NOT USEFUL
plot(reg.summary$rss ,xlab="Number of Variables ",ylab="RSS",type="l")

#adjr2 plot
plot(reg.summary$adjr2 ,xlab="Number of Variables ", ylab="Adjusted RSq",type="l")

max_adjr2 <- which.max(reg.summary$adjr2)
points(max_adjr2, reg.summary$adjr2[max_adjr2], col="red", cex=2, pch=20)

# AIC criterion (Cp) to minimize
plot(reg.summary$cp ,xlab="Number of Variables ",ylab="Cp", type='l')

min_cp <- which.min(reg.summary$cp )
points(min_cp, reg.summary$cp[min_cp], col="red", cex=2, pch=20)

# BIC criterion to minimize
plot(reg.summary$bic, xlab="Number of Variables ", ylab="BIC", type='l')

min_bic <- which.min(reg.summary$bic)
points(min_bic, reg.summary$bic[min_bic], col="red", cex=2, pch=20)

#ridge regression 

# getting the predictors
x_var <- bstats_salary_numvars %>% select(-salary) %>% as.matrix()
# getting the independent variable
y_var <- bstats_salary_numvars[,"salary"]
ridge <- glmnet(x_var, y_var, alpha=0)
summary(ridge)
          Length Class     Mode   
a0         100   -none-    numeric
beta      2600   dgCMatrix S4     
df         100   -none-    numeric
dim          2   -none-    numeric
lambda     100   -none-    numeric
dev.ratio  100   -none-    numeric
nulldev      1   -none-    numeric
npasses      1   -none-    numeric
jerr         1   -none-    numeric
offset       1   -none-    logical
call         4   -none-    call   
nobs         1   -none-    numeric
cv_ridge <- cv.glmnet(x_var, y_var, alpha = 0)
cv_ridge

Call:  cv.glmnet(x = x_var, y = y_var, alpha = 0) 

Measure: Mean-Squared Error 

     Lambda Index   Measure        SE Nonzero
min  216221   100 1.919e+13 5.986e+11      26
1se 1051399    83 1.976e+13 6.505e+11      26
plot(cv_ridge)

cv_ridge$lambda.min
[1] 216221.5
cv_ridge$lambda.1se
[1] 1051399
lbs_fun <- function(fit, offset_x=1, ...) {
  L <- length(fit$lambda)
  x <- log(fit$lambda[L]) + offset_x
  y <- fit$beta[ ,L]
  labs <- names(y)
  text(x, y, labels=labs, ...)
}

plot(ridge, xvar = "lambda", label=T)
lbs_fun(ridge) # add namnes

abline(v = log(cv_ridge$lambda.min), col = "red", lty=2) #lambda.min
abline(v = log(cv_ridge$lambda.1se), col="blue", lty=2)  #lambda.1se

min_ridge <- glmnet(x_var, y_var, alpha=0, lambda = cv_ridge$lambda.min)
coef(min_ridge)
27 x 1 sparse Matrix of class "dgCMatrix"
                      s0
(Intercept)  3250721.295
G             -58444.324
AB              6525.990
R              18080.066
H              13278.220
X2B           -69523.005
X3B          -242522.624
HR            -18087.206
RBI            13495.290
SB             36983.072
CS           -135994.183
BB             21055.638
SO              5972.853
IBB            85143.276
HBP           -15492.692
SH           -240184.732
SF             25292.413
GIDP           96266.785
BA           4307633.504
PA              5100.777
TB              1191.386
SlugPct      1694540.681
OBP          2293477.178
OPS          1307566.343
BABIP       -9978065.665
K_Percent   -2726576.998
BB_Percent   7042279.206
# Make predictions on the test data
predictions <- min_ridge %>% predict(x_var) %>% as.vector()

# Model performance metrics
data.frame(
  RMSE = RMSE(predictions, y_var),
  Rsquare = R2(predictions, y_var)
)
# Lasso 

# getting the predictors
x_var <- bstats_salary_numvars %>% select(-salary) %>% as.matrix()
# getting the independent variable
y_var <- bstats_salary_numvars[,"salary"]
lasso <- glmnet(x_var, y_var, alpha=1)
summary(lasso)
          Length Class     Mode   
a0          93   -none-    numeric
beta      2418   dgCMatrix S4     
df          93   -none-    numeric
dim          2   -none-    numeric
lambda      93   -none-    numeric
dev.ratio   93   -none-    numeric
nulldev      1   -none-    numeric
npasses      1   -none-    numeric
jerr         1   -none-    numeric
offset       1   -none-    logical
call         4   -none-    call   
nobs         1   -none-    numeric
cv_lasso <- cv.glmnet(x_var, y_var, alpha = 1)
cv_lasso

Call:  cv.glmnet(x = x_var, y = y_var, alpha = 1) 

Measure: Mean-Squared Error 

    Lambda Index   Measure        SE Nonzero
min   4245    68 1.899e+13 6.269e+11      20
1se  91445    35 1.956e+13 6.738e+11      17
plot(cv_lasso)

lbs_fun <- function(fit, offset_x=1, ...) {
  L <- length(fit$lambda)
  x <- log(fit$lambda[L])+ offset_x
  y <- fit$beta[, L]
  labs <- names(y)
  text(x, y, labels=labs, ...)
}
plot(lasso, xvar = "lambda", label=T)
lbs_fun(lasso)

abline(v=log(cv_lasso$lambda.min), col = "red", lty=2)
abline(v=log(cv_lasso$lambda.1se), col="blue", lty=2)

min_lasso <- glmnet(x_var, y_var, alpha=1, lambda = cv_lasso$lambda.min)
coef(min_lasso)
27 x 1 sparse Matrix of class "dgCMatrix"
                      s0
(Intercept)  2264368.204
G             -87629.654
AB             24232.612
R              18771.351
H                  .    
X2B          -107459.898
X3B          -280673.479
HR            -57743.509
RBI            20863.414
SB             33703.191
CS           -150540.856
BB             28671.666
SO                 .    
IBB            99202.779
HBP            -8281.984
SH           -238325.444
SF             19061.264
GIDP           87321.414
BA                 .    
PA              1860.331
TB             -1044.547
SlugPct      4865101.359
OBP                .    
OPS          2765165.894
BABIP       -6624249.069
K_Percent   -1340986.372
BB_Percent   7490373.844
se_lasso <- glmnet(x_var, y_var, alpha=1, lambda = cv_lasso$lambda.1se)
coef(se_lasso)
27 x 1 sparse Matrix of class "dgCMatrix"
                      s0
(Intercept)  3242070.192
G             -50386.031
AB              9548.952
R              12602.471
H                  .    
X2B            -8338.434
X3B          -168194.534
HR              9621.891
RBI            15185.753
SB              4485.584
CS            -39631.941
BB             29452.924
SO                 .    
IBB            83501.590
HBP                .    
SH           -218512.738
SF                 .    
GIDP           86598.503
BA                 .    
PA              1625.492
TB                 .    
SlugPct            .    
OBP                .    
OPS           477409.129
BABIP              .    
K_Percent   -2789571.448
BB_Percent   5662137.365
# Make predictions on the test data
predictions <- min_lasso %>% predict(x_var) %>% as.vector()
# Model performance metrics
data.frame(
  RMSE = RMSE(predictions, y_var),
  Rsquare = R2(predictions, y_var)
)

Salary Data

franchise <- c(`ANA` = "LAA", `ARI` = "ARI", `ATL` = "ATL", 
               `BAL` = "BAL", `BOS` = "BOS", `CAL` = "LAA",
               `CHA` = "CHA", `CHN` = "CHN", `CIN` = "CIN", 
               `CLE` = "CLE", `COL` = "COL", `DET` = "DET", 
               `FLO` = "MIA", `HOU` = "HOU", `KCA` = "KCA", 
               `LAA` = "LAA", `LAN` = "LAN", `MIA` = "MIA", 
               `MIL` = "MIL", `MIN` = "MIN", `ML4` = "MIL", 
               `MON` = "WAS", `NYA` = "NYA", `NYM` = "NYN", 
               `NYN` = "NYN", `OAK` = "OAK", `PHI` = "PHI", 
               `PIT` = "PIT", `SDN` = "SDN", `SEA` = "SEA",
               `SFG` = "SFN", `SFN` = "SFN", `SLN` = "SLN", 
               `TBA` = "TBA", `TEX` = "TEX", `TOR` = "TOR",
               `WAS` = "WAS")
Salaries$franchise <- unname(franchise[Salaries$teamID])
avg_team_salaries <- Salaries %>%
    group_by(yearID, franchise, lgID) %>%
    summarise(salary = mean(salary)/1e6) %>%
    filter(!(franchise == "CLE" & lgID == "NL"))
`summarise()` has grouped output by 'yearID', 'franchise'. You can override using the `.groups` argument.
ggplot(avg_team_salaries, 
       aes(x = yearID, y = salary, group = factor(franchise))) +
       geom_path() +
       labs(x = "Year", y = "Average team salary (millions USD)")

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCmVkaXRvcl9vcHRpb25zOiAKICBjaHVua19vdXRwdXRfdHlwZTogaW5saW5lCi0tLQoKYGBge3J9CnJtKGxpc3QgPSBscygpKQoKbGlicmFyeShMYWhtYW4pCmxpYnJhcnkobW9zYWljKQpsaWJyYXJ5KHRpZHlyKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShkcGx5cikKbGlicmFyeShtcGxvdCkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGNsdXN0ZXIpCmxpYnJhcnkoZmFjdG9leHRyYSkKbGlicmFyeShjb3JycGxvdCkKbGlicmFyeShkYXRhLnRhYmxlKQpsaWJyYXJ5KG1vZCkKbGlicmFyeShtb2RlbHIpCmxpYnJhcnkobGVhcHMpCmxpYnJhcnkoY2FyZXQpCmxpYnJhcnkoSVNMUjIpCmxpYnJhcnkoZ2xtbmV0KQpgYGAKCmBgYHtyfQojTG9hZCBpbiBQZW9wbGUsIEJhdHRpbmcsIGFuZCBQaXRjaGluZyBEYXRhZnJhbWVzCmRhdGEoIlBlb3BsZSIpIApkYXRhKCJCYXR0aW5nIikKZGF0YSgiUGl0Y2hpbmciKQpgYGAKCmBgYHtyfQojTWVyZ2VzIHBsYXllciBuYW1lIHRvIEJhdHRpbmcgZGF0YS4gCmJzdGF0cyA8LSBiYXR0aW5nU3RhdHMoKQoJc3RyKGJzdGF0cykKCQoKUGVvcGxlJG5hbWUgPC0gcGFzdGUoUGVvcGxlJG5hbWVGaXJzdCwgUGVvcGxlJG5hbWVMYXN0LCBzZXAgPSAiICIpCgpiYXR0aW5nX25hbWUgPC0gbWVyZ2UoQmF0dGluZywKICAgICAgICAgICAgICAgICBQZW9wbGVbLGMoInBsYXllcklEIiwgIm5hbWUiKV0sCiAgICAgICAgICAgICAgICAgYnkgPSAicGxheWVySUQiLCBhbGwueCA9IFRSVUUpCgojTWVyZ2VzIHBsYXllciBuYW1lIHRvIFBpdGNoaW5nIGRhdGEuCgpQZW9wbGUkbmFtZSA8LSBwYXN0ZShQZW9wbGUkbmFtZUZpcnN0LCBQZW9wbGUkbmFtZUxhc3QsIHNlcCA9ICIgIikKCnBpdGNoaW5nX25hbWUgPC0gbWVyZ2UoUGl0Y2hpbmcsCiAgICAgICAgICAgICAgICAgUGVvcGxlWyxjKCJwbGF5ZXJJRCIsICJuYW1lIildLAogICAgICAgICAgICAgICAgIGJ5ID0gInBsYXllcklEIiwgYWxsLnggPSBUUlVFKQpgYGAKCmBgYHtyfQojQ3JlYXRpbmcgYWRkaXRpb25hbCBzdGF0cyBmb3IgYnN0YXRzCmJzdGF0c1tpcy5uYShic3RhdHMpXSA9IDAKI2lzLm5hbihic3RhdHMpCgpic3RhdHMgPC0gYnN0YXRzICU+JQogIG11dGF0ZShLX1BlcmNlbnQgPSBTTyAvIFBBKSAlPiUKICBtdXRhdGUoQkJfUGVyY2VudCA9IChCQiArIElCQikgLyBQQSkgJT4lCiAgbXV0YXRlX2FsbCh+cmVwbGFjZSguLCBpcy5uYW4oLiksIDApKQoKYGBgCgpgYGB7cn0KYnN0YXRzIDwtIGJzdGF0cyAlPiUKICBtdXRhdGVfYXQodmFycyhLX1BlcmNlbnQsIEJCX1BlcmNlbnQpLCBmdW5zKHJvdW5kKC4sIDMpKSkKYGBgCgpgYGB7cn0KYnN0YXRzX3NhbGFyeSA8LSBic3RhdHMgJT4lCiAgICAgICAgICAgICAgZmlsdGVyKHllYXJJRCA+PSAxOTg1KSAlPiUKICAgICAgICAgICAgICBsZWZ0X2pvaW4oc2VsZWN0KFNhbGFyaWVzLCBwbGF5ZXJJRCwgeWVhcklELCB0ZWFtSUQsIHNhbGFyeSksIAogICAgICAgICAgICAgICAgICAgICAgICAgYnk9YygicGxheWVySUQiLCAieWVhcklEIiwgInRlYW1JRCIpKQoKYnN0YXRzX3NhbGFyeVtpcy5uYShic3RhdHNfc2FsYXJ5KV0gPSAwCnN0cihic3RhdHNfc2FsYXJ5KQoKYGBgCgpgYGB7cn0KYnN0YXRzX3N1cmUgPC0gYnN0YXRzX3NhbGFyeSAlPiUKICBmaWx0ZXIoUEEgPiAxNTApICU+JQogIHNlbGVjdChPUFMsIEJBQklQLCBLX1BlcmNlbnQsIEJCX1BlcmNlbnQsIHNhbGFyeSkKYGBgCgojIyBEYXRhIFByZXBhcmF0aW9uIChMZXNzb24gMSAmIDIpCgpgYGB7cn0KI0tlZXAgcGxheWVycyB3aXRoIG92ZXIgMTUwIGF0IGJhdHMuIChXZSBjYW4gY2hhbmdlIHRoaXMgdmFsdWUgaWYgbmVjZXNzYXJ5KS4KI0NyZWF0aW5nIGJhdHRpbmcgYXZlcmFnZSB2YXJpYWJsZS4KCmJhdHRpbmcxIDwtIGJzdGF0cyAlPiUKICBmaWx0ZXIoQUIgPj0gMTUwKQogIApgYGAKCmBgYHtyfQpic3RhdHMgJT4lCiAgZmlsdGVyKHBsYXllcklEID09ICJib2dhZXhhMDEiKQpgYGAKCiMjIEV4cGxvcmF0b3J5IEFuYWx5c2lzIChMZXNzb24gMSAmIDIpCkxlc3NvbnMgMSBhbmQgMiB3aWxsIGp1c3QgYmUgcGFydHMgb2YgdGhlIG92ZXJhbGwgcHJvamVjdC4gU2ltcGxlIHRoaW5ncyBsaWtlIGRhdGEgbWFuaXB1bGF0aW9uLCBhcHBseSBmdW5jdGlvbnMsIGJveHBsb3RzLCBldGMuIFRoaXMgd2lsbCBiZSBkYXRhIHByZXBhcmF0aW9uIGl0ZW1zIGFuZCBleHBsb3JhdG9yeSBhbmFseXNpcy4KCmBgYHtyfQpiIDwtIGdncGxvdChiYXR0aW5nMSwgYWVzKHggPSB0ZWFtSUQsIHkgPSBIUikpICsKICBnZW9tX2JveHBsb3QoY29sID0gImJsYWNrIiwgYWVzKGZpbGwgPSB0ZWFtSUQpKQpiCgpgYGAKCmBgYHtyfQpoaXR0ZXJzMSA8LSBiYXR0aW5nMSAlPiUKICBmaWx0ZXIoeWVhcklEIDwgMTg5NSkgJT4lCiAgc2VsZWN0KFNsdWdQY3QpCgpoaXR0ZXJzMiA8LSBiYXR0aW5nMSAlPiUKICBmaWx0ZXIoeWVhcklEID4gMTg5NCwgeWVhcklEIDwgMTkyMSkgJT4lCiAgc2VsZWN0KFNsdWdQY3QpCgpoaXR0ZXJzMyA8LSBiYXR0aW5nMSAlPiUKICBmaWx0ZXIoeWVhcklEID4gMTkyMCwgeWVhcklEIDwgMTk2OSkgJT4lCiAgc2VsZWN0KFNsdWdQY3QpCgpoaXR0ZXJzNCA8LSBiYXR0aW5nMSAlPiUKICBmaWx0ZXIoeWVhcklEID4gMTk2OSkgJT4lCiAgc2VsZWN0KFNsdWdQY3QpCiNPcmdhbml6aW5nIDQgZGlmZmVyZW50IGRhdGFzZXRzIGxvb2tpbmcgYXQgc2x1Z2dpbmcgcGVyY2VudGFnZSBmb3IgdGhlIGZvbGxvd2luZyBib3hwbG90cy4gQWxsIG9mIHRoZXNlIGFyZSBzb21ld2hhdCBkaWZmZXJlbnQgZXJhcywgd2l0aCB0aGUgbW9zdCBkcmFtYXRpYyBzcGxpdCBiZWluZyBmcm9tIGJlZm9yZSAxOTIwIChwcmUtQmFiZSBSdXRoKSBhbmQgYWZ0ZXIgMTkyMCAoZHVyaW5nIGFuZCBwb3N0LUJhYmUgUnV0aCkKYGBgCgpgYGB7cn0KYm94cGxvdChoaXR0ZXJzMSwKICAgICAgICBtYWluID0gIlNsdWdnaW5nIHBlcmNlbnRhZ2UgZnJvbSBsYXRlIDE4NzEgLSAxODk0IiwKICAgICAgICB5bGFiID0gIlNsdWdnaW5nIHBlcmNlbnRhZ2UiLAogICAgICAgIGNvbCA9ICJibHVlIiwKICAgICAgICBob3Jpem9udGFsID0gVFJVRSkKYGBgCgpgYGB7cn0KYm94cGxvdChoaXR0ZXJzMiwgCiAgICAgICAgbWFpbiA9ICJTbHVnZ2luZyBwZXJjZW50YWdlIGZyb20gMTg5NS0xOTIwIiwKICAgICAgICB5bGFiID0gIlNsdWdnaW5nIHBlcmNlbnRhZ2UiLAogICAgICAgIGNvbCA9ICJ5ZWxsb3ciLAogICAgICAgIGhvcml6b250YWwgPSBUUlVFKQpgYGAKCmBgYHtyfQpib3hwbG90KGhpdHRlcnMzLCAKICAgICAgICBtYWluID0gIlNsdWdnaW5nIHBlcmNlbnRhZ2UgZnJvbSAxOTIxLTE5NjgiLAogICAgICAgIHlsYWIgPSAiU2x1Z2dpbmcgcGVyY2VudGFnZSIsCiAgICAgICAgY29sID0gInJlZCIsCiAgICAgICAgaG9yaXpvbnRhbCA9IFRSVUUpCmBgYAoKYGBge3J9CmJveHBsb3QoaGl0dGVyczQsIAogICAgICAgIG1haW4gPSAiU2x1Z2dpbmcgcGVyY2VudGFnZSBmcm9tIDE5NjkgLSBwcmVzZW50IiwKICAgICAgICB5bGFiID0gIlNsdWdnaW5nIHBlcmNlbnRhZ2UiLAogICAgICAgIGNvbCA9ICJyZWQiLAogICAgICAgIGhvcml6b250YWwgPSBUUlVFKQpgYGAKCgpgYGB7cn0Kc2FwcGx5KGhpdHRlcnMxLCBtZWFuLCBuYS5ybSA9IFQpCnNhcHBseShoaXR0ZXJzMiwgbWVhbiwgbmEucm0gPSBUKQpzYXBwbHkoaGl0dGVyczMsIG1lYW4sIG5hLnJtID0gVCkKc2FwcGx5KGhpdHRlcnM0LCBtZWFuLCBuYS5ybSA9IFQpCiNOb3RpY2UgdGhhdCBnaWdhbnRpYyBpbmNyZWFzZSBiZXR3ZWVuIGhpdHRlcnMyIGFuZCBoaXR0ZXJzMwpgYGAKCmBgYHtyfQpzdW1tYXJ5KGhpdHRlcnMxKQpgYGAKCmBgYHtyfQpzdW1tYXJ5KGhpdHRlcnMyKQpgYGAKCmBgYHtyfQpzdW1tYXJ5KGhpdHRlcnMzKQpgYGAKCmBgYHtyfQpzdW1tYXJ5KGhpdHRlcnM0KQpgYGAKCmBgYHtyfQojS2VlcCBiYXR0aW5nIHN0YXRzIHRoYXQgd2Ugd2FudCBmb3IgcGFpcnMuCmJhdHRpbmdfbnVtIDwtIGJzdGF0cyAlPiUKICBmaWx0ZXIoUEEgPj0gMTUwKSAlPiUKICBzZWxlY3QoIkJBIiwgJ09CUCcsICdTbHVnUGN0JywgIlNPIiwgIkJCIiwgIkhSIikKICAKYGBgCgpgYGB7cn0KcGFpcnMoYmF0dGluZ19udW0pCmBgYAojIyMjIENhcmVlciBCYXR0aW5nIFN0YXRzCmBgYHtyfQpjYXJlZXJCYXR0aW5nIDwtIG5hLm9taXQoYnN0YXRzKQpgYGAKCmBgYHtyfQpjYXJlZXJCYXR0aW5nIDwtIGNhcmVlckJhdHRpbmcgJT4lCiAgc2VsZWN0KHBsYXllcklELCBCQSwgUEEsIFNsdWdQY3QsIE9CUCwgU08sIEhSKSAlPiUKICBncm91cF9ieShwbGF5ZXJJRCkgJT4lCiAgc3VtbWFyaXNlX2FsbCgnbWVhbicpCmBgYAoKYGBge3J9CmNhcmVlckJhdHRpbmdfbnVtIDwtIGNhcmVlckJhdHRpbmcgJT4lCiAgZmlsdGVyKFBBID49IDE1MCkgJT4lCiAgc2VsZWN0KEJBLCBQQSwgU2x1Z1BjdCwgT0JQLCBTTywgSFIpCgpwYWlycyhjYXJlZXJCYXR0aW5nX251bSkKYGBgCmBgYHtyfQpjb3JybWF0cml4IDwtIGNvcihiYXR0aW5nX251bSkKY29ycnBsb3QoY29ycm1hdHJpeCwgbWV0aG9kID0gJ251bWJlcicpICNHaXZlcyB1cyBjb3JyZWxhdGlvbiBmcm9tIHBhaXJzIGdyYXBoLgpgYGAKCmBgYHtyfQpjYXJlZXJCYXR0aW5nX251bTEgPC0gY2FyZWVyQmF0dGluZ19udW0gJT4lCiAgZmlsdGVyKFBBID4gNTAwKQpgYGAKCgojIyAwLWRpbWVuc2lvbmFsIFJlZHVjdGlvbiAoTGVzc29uIDQpCgoKIyMjIyBCb290c3RyYXBwaW5nCgojIyBQQ0EgKExlc3NvbiA0KQpgYGB7cn0KcmVzIDwtIGJhdHRpbmdfbnVtICU+JSBwcmNvbXAoc2NhbGUgPSBUUlVFKQpyZXMKYGBgCgpgYGB7cn0KbG9hZGluZ3MgPC0gcmVzJHJvdGF0aW9uCmxvYWRpbmdzCmBgYAoKYGBge3J9CnNjb3JlX21hdCA8LSByZXMkeApzY29yZV9tYXQKYGBgCgoKYGBge3J9CmdldF9laWcocmVzKQpgYGAKCiMjIyMgU2NyZWVwbG90CmBgYHtyfQpnZXRfZWlnKHJlcykgJT4lCiAgZ2dwbG90KGFlcyh4ID0gMTo2LCB5ID0gY3VtdWxhdGl2ZS52YXJpYW5jZS5wZXJjZW50KSkgKwogIGdlb21fbGluZSgpICsKICBnZW9tX3BvaW50KCkgKwogIGdlb21faGxpbmUoeWludGVyY2VwdCA9IDgwKSArCiAgeGxhYigiUHJpbmNpcGFsIENvbXBvbmVudCIpICsKICB5bGFiKCJQcm9wb3J0aW9uIG9mIFZhcmlhbmNlIEV4cGxhaW5lZCIpICsKICBnZ3RpdGxlKCJTY3JlZSBQbG90IG9mIFByaW5jaXBhbCBDb21wb25lbnQgZm9yIEJhdHRpbmcgU3RhdGlzdGljcyIpCmBgYAoKMiBQcmluY2lwYWwgQ29tcG9uZW50czogUEMxIGFuZCBQQzIKCmBgYHtyfQpmdml6X3NjcmVlcGxvdChyZXMsIG1haW4gPSAiU2NyZWUgUGxvdCIpCmBgYAoKQ2FuIElkZW50aWZ5IGFuIGVsYm93IGluIDMuCgojIyMjIEJpcGxvdApgYGB7cn0KcmVzICU+JQogIGZ2aXpfcGNhX3ZhcihheGVzID0gYygxLDIpLAogICAgICAgICAgICAgICBjb2wudmFyID0gImNvbnRyaWIiLAogICAgICAgICAgICAgICBncmFkaWVudC5jb2xzID0gYygiIzAwQUZCQiIsICIjRTdCODAwIiwgIiNGQzRFMDciKSwKICAgICAgICAgICAgICAgcmVwZWwgPSBUUlVFCiAgICAgICAgICAgICAgICkKYGBgCgoKIyMgQ2x1c3RlciBBbmFseXNpcyAoTGVzc29uIDUpCmBgYHtyfQojTk9UIENPTVBMRVRFISEhISEgVGhpcyB3YXMganVzdCBhIHRlc3QsIGJzdGF0cyBpcyB3YXkgdG9vIGJpZy4KYnN0YXRzX2Jlc3QgPC0gYnN0YXRzICU+JQogIGZpbHRlcihQQSA+PSA2MDApCgpldV9kaXN0IDwtIGdldF9kaXN0KGNhcmVlckJhdHRpbmdfbnVtMSwgbWV0aG9kID0gJ2V1Y2xpZGVhbicpCmBgYAoKYGBge3J9CmhjX2NvbXBsZXRlIDwtIGhjbHVzdChldV9kaXN0LCBtZXRob2QgPSAnY29tcGxldGUnKQoKcGxvdChoY19jb21wbGV0ZSkKYGBgCgojIyMjIFNpbGhvdWV0dGUKCmBgYHtyfQpyZXNfdGVzdCA8LSBjYXJlZXJCYXR0aW5nX251bTEgJT4lIGttZWFucyg3KQogIHN0cihyZXNfdGVzdCkKYGBgCgoKYGBge3J9CmRpc3RhbmNlIDwtIGdldF9kaXN0KGNhcmVlckJhdHRpbmdfbnVtMSwgbWV0aG9kID0gImV1Y2xpZGVhbiIpCnNpbCA8LSBzaWxob3VldHRlKHggPSByZXNfdGVzdCRjbHVzdGVyLCBkaXN0ID0gZGlzdGFuY2UpCnN1bW1hcnkoc2lsKQpzaWwgJT4lIGhlYWQoKQpgYGAKCmBgYHtyfQpmdml6X3NpbGhvdWV0dGUoc2lsKQpgYGAKCmBgYHtyfQpmdml6X25iY2x1c3QoY2FyZWVyQmF0dGluZ19udW0xLCBoY3V0LCBoY19tZXRob2QgPSAiY29tcGxldGUiLCBoY19tZXRyaWMgPSAiZXVjbGlkZWFuIiwgbWV0aG9kID0gIndzcyIpCmBgYAoKYGBge3J9CiMjVGhpcyBpcyB0byB0ZXN0IG90aGVyIHZhbHVlcyBvZiBLIGZvciB0aGUgc2lsaG91ZXR0ZSBtZXRob2QuCnJlc190ZXN0MSA8LSBjYXJlZXJCYXR0aW5nX251bTEgJT4lIGttZWFucygxMCApCiAgc3RyKHJlc190ZXN0MSkKYGBgCgoKYGBge3J9CmRpc3RhbmNlIDwtIGdldF9kaXN0KGNhcmVlckJhdHRpbmdfbnVtMSwgbWV0aG9kPSJldWNsaWRlYW4iKQpzaWwgPC0gc2lsaG91ZXR0ZSh4ID0gcmVzX3Rlc3QxJGNsdXN0ZXIsIGRpc3QgPSBkaXN0YW5jZSkKc3VtbWFyeShzaWwpCnNpbCAlPiUgaGVhZCgpCmBgYAoKYGBge3J9CmZ2aXpfc2lsaG91ZXR0ZShzaWwpCmBgYAoKCiMjIyMgRGlhbmEKCiMjIExpbmVhciBSZWdyZXNzaW9uIChMZXNzb24gNikKCkxpbmVhciBSZWdyZXNzaW9uIGNvbXBhcmluZyB0ZWFtIHBheXJvbGwgYW5kIHdpbiByYXRlLgpgYGB7cn0KdGVhbXMgPSBhcy5kYXRhLnRhYmxlKFRlYW1zKQp0ZWFtcyA9IHRlYW1zWywgLih5ZWFySUQsCiAgICAgICAgICAgICAgICAgIGxnSUQgPSBhcy5jaGFyYWN0ZXIobGdJRCksCiAgICAgICAgICAgICAgICAgIHRlYW1JRCA9IGFzLmNoYXJhY3Rlcih0ZWFtSUQpLAogICAgICAgICAgICAgICAgICBmcmFuY2hJRCA9IGFzLmNoYXJhY3RlcihmcmFuY2hJRCksCiAgICAgICAgICAgICAgICAgIFJhbmssIEcsIFcsIEwsIFIsIEVSQSwgU08sCiAgICAgICAgICAgICAgICAgIFdpblBlcmNlbnQgPSBXLyhXK0wpKV0KCnNhbGFyaWVzID0gYXMuZGF0YS50YWJsZShTYWxhcmllcykKc2FsYXJpZXMgPSBzYWxhcmllc1ssIGMoImxnSUQiLCAidGVhbUlEIiwgInNhbGFyeTFNIikgOj0KICAgICAgICAgICAgICAgICAgICAgIGxpc3QoYXMuY2hhcmFjdGVyKGxnSUQpLCBhcy5jaGFyYWN0ZXIodGVhbUlEKSwgc2FsYXJ5IC8gMWU2TCldCnBheXJvbGwgPSBzYWxhcmllc1ssIC4ocGF5cm9sbCA9IHN1bShzYWxhcnkxTSkpLCBieT0uKHRlYW1JRCwgeWVhcklEKV0KdGVhbVBheXJvbGwgPSBtZXJnZSh0ZWFtcywgcGF5cm9sbCwgYnkgPSBjKCJ0ZWFtSUQiLCAieWVhcklEIikpCmBgYAoKYGBge3J9CmdncGxvdChkYXRhID0gdGVhbVBheXJvbGwsIGFlcyh4ID0gcGF5cm9sbCwgeSA9IFdpblBlcmNlbnQpKSArIGdlb21fcG9pbnQoKSAgKyBsYWJzKHggPSAiUGF5cm9sbCAoaW4gbWlsbGlvbnMpIiwgeSA9ICJXaW4gUGVyY2VudGFnZSIpICsKICBnZW9tX3Ntb290aChtZXRob2QgPSBsbSwgc2UgPSBGQUxTRSkKCmBgYApgYGB7cn0KbW9kX2xtIDwtIGxtKGRhdGEgPSB0ZWFtUGF5cm9sbCwgV2luUGVyY2VudH5wYXlyb2xsKQptb2RfbG0KYGBgCgpgYGB7cn0Kc3VtbWFyeShtb2RfbG0pCmBgYApgYGB7cn0KcGF5cm9sbF9wcmVkIDwtIHRlYW1QYXlyb2xsICU+JQogIGFkZF9wcmVkaWN0aW9ucyhtb2RfbG0pCgpwYXlyb2xsX3ByZWQgJT4lCiAgZmlsdGVyKHllYXJJRCA+PSAyMDEwKSAlPiUKICBhcnJhbmdlKGRlc2MocHJlZCkpICU+JQogIGhlYWQoMjUpCmBgYApgYGB7cn0KcGF5cm9sbF9wcmVkICU+JQogIGZpbHRlcih5ZWFySUQgPj0gMjAxMCkgJT4lCiAgYXJyYW5nZShkZXNjKFdpblBlcmNlbnQpKSAlPiUKICBoZWFkKDI1KQpgYGAKT25seSBmaXZlIHRlYW1zIGFyZSBpbiB0aGUgdG9wIDI1IG9mIGJvdGggcGF5cm9sbCBhbmQgd2luIHBlcmNlbnRhZ2UgaW4gdGhlIDIwMTBzLiBUaGVzZSB0ZWFtcyBhcmUgdGhlIDIwMTEgUGhpbGxpZXMsIDIwMTEgWWFua2VlcywgMjAxMCBZYW5rZWVzLCAyMDEyIFlhbmtlZXMsIGFuZCAyMDE2IFJhbmdlcnMuIFRoaXMgc2hvd3MgdGhhdCBzcGVuZGluZyB0aGUgbW9zdCBtb25leSBkb2Vzbid0IGF1dG9tYXRpY2FsbHkgbWVhbiB5b3UgYXJlIGdldHRpbmcgdGhlIGJlc3QgcHJvZHVjdCBvbiB0aGUgZmllbGQuCiMjIFNpbXBsZSBMaW5lYXIgUmVncmVzc2lvbgoKIyMgTXVsdGlwbGUgTGluZWFyIFJlZ3Jlc3Npb24KYGBge3J9CmJzdGF0c19zYWxhcnkgPC0gYnN0YXRzX3NhbGFyeSAlPiUKICBmaWx0ZXIoUEEgPj0gMTAwKSAlPiUKICBmaWx0ZXIoc2FsYXJ5ID4gNTAwMDAwKQpgYGAKCgpgYGB7cn0KbG1fbW9kIDwtIGxtKHNhbGFyeSB+IEgsIEhSLCBkYXRhID0gYnN0YXRzX3NhbGFyeSkKc3VtbWFyeShsbV9tb2QpCmBgYApgYGB7cn0KbG1fbW9kX3ByZCA8LSBic3RhdHNfc2FsYXJ5ICU+JSBhZGRfcHJlZGljdGlvbnMobG1fbW9kKQpsbV9tb2RfcHJkCmBgYApgYGB7cn0KZnVsbF9tb2RlbCA8LSBsbShzYWxhcnkgfi4sIGRhdGEgPSBic3RhdHNfc3VyZSkKc3VtbWFyeShmdWxsX21vZGVsKQpgYGAKYGBge3J9CmZ1bGxfbW9kZWxfcHJlZCA8LSBic3RhdHNfc3VyZSAlPiUgYWRkX3ByZWRpY3Rpb25zKGZ1bGxfbW9kZWwpCmZ1bGxfbW9kZWxfcHJlZApgYGAKYGBge3J9CmFkdl9zdGF0X21vZCA8LSBsbShzYWxhcnkgfiBPUFMsIGRhdGEgPSBic3RhdHNfc2FsYXJ5KQpzdW1tYXJ5KGFkdl9zdGF0X21vZCkKYGBgCgoKIyMgUmVzYW1wbGluZyBNZXRob2RzCgpgYGB7cn0KI2luY2x1ZGluZyAyMDAyIGFuZCB1cCBiZWNhdXNlIHNhbGFyeSBiZWNvbWVzIGhpZ2hlcgpic3RhdHNfc2FsYXJ5XzIxY2VudHVyeSA8LSBic3RhdHNfc2FsYXJ5ICU+JQogIGZpbHRlcih5ZWFySUQgPj0gMjAwMikKYGBgCgoKYGBge3J9CmJzdGF0c19zYWxhcnlfMjFjZW50dXJ5ICU+JSBoZWFkKDEwKQpgYGAKCgoKYGBge3J9CiMgc2V0dGluZyBzZWVkIHRvIGdlbmVyYXRlIGEgcmVwcm9kdWNpYmxlIHJhbmRvbSBzYW1wbGluZwpzZXQuc2VlZCgxMjMpCiAKIyBkZWZpbmluZyB0cmFpbmluZyBjb250cm9sIGFzIGNyb3NzLXZhbGlkYXRpb24gYW5kIHZhbHVlIG9mIEsgZXF1YWwgdG8gMTAKdHJhaW5fY29udHJvbCA8LSB0cmFpbkNvbnRyb2wobWV0aG9kID0gImN2IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbnVtYmVyID0gMTApCgojIHRyYWluaW5nIHRoZSBtb2RlbAptb2RlbCA8LSB0cmFpbihzYWxhcnkgfiBPQlAsIGRhdGEgPSBic3RhdHNfc2FsYXJ5XzIxY2VudHVyeSwKICAgICAgICAgICAgICAgbWV0aG9kID0gImxtIiwKICAgICAgICAgICAgICAgdHJDb250cm9sID0gdHJhaW5fY29udHJvbCkKCnByaW50KG1vZGVsKQpgYGAKCgojIyBGZWF0dXJlIFNlbGVjdGlvbgpgYGB7cn0KYnN0YXRzX3NhbGFyeV9udW12YXJzIDwtIGJzdGF0c19zYWxhcnlfMjFjZW50dXJ5ICU+JSAKICBzZWxlY3QoYyg2OjMyKSkKYGBgCgpgYGB7cn0KcmVnZml0LmZ1bGwgPSByZWdzdWJzZXRzKHNhbGFyeSB+LiwgZGF0YSA9IGJzdGF0c19zYWxhcnlfbnVtdmFycywgIG52bWF4ID0gMTMsIG1ldGhvZD0iZXhoYXVzdGl2ZSIpCnN1bW1hcnkocmVnZml0LmZ1bGwpCmBgYAoKYGBge3J9CnN1bW1hcnkocmVnZml0LmZ1bGwpJHJzcQpgYGAKCgoKYGBge3J9CnBsb3Qoc3VtbWFyeShyZWdmaXQuZnVsbCkkcnNxKQpgYGAKCmBgYHtyfQpyZWcuc3VtbWFyeSA8LSBzdW1tYXJ5KHJlZ2ZpdC5mdWxsKSAjZ2V0IHRoZSBzdW1tYXJ5CgpwYXIobWZyb3c9YygyLDIpKQojcnNzIHBsb3QgLSAgTk9UIFVTRUZVTApwbG90KHJlZy5zdW1tYXJ5JHJzcyAseGxhYj0iTnVtYmVyIG9mIFZhcmlhYmxlcyAiLHlsYWI9IlJTUyIsdHlwZT0ibCIpCgojYWRqcjIgcGxvdApwbG90KHJlZy5zdW1tYXJ5JGFkanIyICx4bGFiPSJOdW1iZXIgb2YgVmFyaWFibGVzICIsIHlsYWI9IkFkanVzdGVkIFJTcSIsdHlwZT0ibCIpCgptYXhfYWRqcjIgPC0gd2hpY2gubWF4KHJlZy5zdW1tYXJ5JGFkanIyKQpwb2ludHMobWF4X2FkanIyLHJlZy5zdW1tYXJ5JGFkanIyW21heF9hZGpyMl0sIGNvbD0icmVkIixjZXg9MixwY2g9MjApCgojIEFJQyBjcml0ZXJpb24gKENwKSB0byBtaW5pbWl6ZQpwbG90KHJlZy5zdW1tYXJ5JGNwICx4bGFiPSJOdW1iZXIgb2YgVmFyaWFibGVzICIseWxhYj0iQ3AiLCB0eXBlPSdsJykKCm1pbl9jcCA8LSB3aGljaC5taW4ocmVnLnN1bW1hcnkkY3AgKQpwb2ludHMobWluX2NwLCByZWcuc3VtbWFyeSRjcFttaW5fY3BdLGNvbD0icmVkIixjZXg9MixwY2g9MjApCgojIEJJQyBjcml0ZXJpb24gdG8gbWluaW1pemUKcGxvdChyZWcuc3VtbWFyeSRiaWMgLHhsYWI9Ik51bWJlciBvZiBWYXJpYWJsZXMgIix5bGFiPSJCSUMiLHR5cGU9J2wnKQoKbWluX2JpYyA8LSB3aGljaC5taW4ocmVnLnN1bW1hcnkkYmljKQpwb2ludHMobWluX2JpYyxyZWcuc3VtbWFyeSRiaWNbbWluX2JpY10sY29sPSJyZWQiLGNleD0yLHBjaD0yMCkKYGBgCgpgYGB7cn0KI0ZvcndhcmQgc3RlcHdpc2Ugc2VsZWN0aW9uCnJlZ2ZpdC5md2QgPSByZWdzdWJzZXRzKHNhbGFyeSB+LiAsIGRhdGE9YnN0YXRzX3NhbGFyeV9udW12YXJzLCBudm1heD0xMywgbWV0aG9kID0iZm9yd2FyZCIpCnN1bW1hcnkocmVnZml0LmZ3ZCkKYGBgCgpgYGB7cn0KcmVnLnN1bW1hcnkgPC0gc3VtbWFyeShyZWdmaXQuZndkKSAjZ2V0IHRoZSBzdW1tYXJ5CgpwYXIobWZyb3c9YygyLDIpKQojcnNzIHBsb3QgLSAgTk9UIFVTRUZVTApwbG90KHJlZy5zdW1tYXJ5JHJzcyAseGxhYj0iTnVtYmVyIG9mIFZhcmlhYmxlcyAiLHlsYWI9IlJTUyIsdHlwZT0ibCIpCgojYWRqcjIgcGxvdApwbG90KHJlZy5zdW1tYXJ5JGFkanIyICx4bGFiPSJOdW1iZXIgb2YgVmFyaWFibGVzICIsIHlsYWI9IkFkanVzdGVkIFJTcSIsdHlwZT0ibCIpCgptYXhfYWRqcjIgPC0gd2hpY2gubWF4KHJlZy5zdW1tYXJ5JGFkanIyKQpwb2ludHMobWF4X2FkanIyLHJlZy5zdW1tYXJ5JGFkanIyW21heF9hZGpyMl0sIGNvbD0icmVkIixjZXg9MixwY2g9MjApCgojIEFJQyBjcml0ZXJpb24gKENwKSB0byBtaW5pbWl6ZQpwbG90KHJlZy5zdW1tYXJ5JGNwICx4bGFiPSJOdW1iZXIgb2YgVmFyaWFibGVzICIseWxhYj0iQ3AiLCB0eXBlPSdsJykKCm1pbl9jcCA8LSB3aGljaC5taW4ocmVnLnN1bW1hcnkkY3AgKQpwb2ludHMobWluX2NwLCByZWcuc3VtbWFyeSRjcFttaW5fY3BdLGNvbD0icmVkIixjZXg9MixwY2g9MjApCgojIEJJQyBjcml0ZXJpb24gdG8gbWluaW1pemUKcGxvdChyZWcuc3VtbWFyeSRiaWMgLHhsYWI9Ik51bWJlciBvZiBWYXJpYWJsZXMgIix5bGFiPSJCSUMiLHR5cGU9J2wnKQoKbWluX2JpYyA8LSB3aGljaC5taW4ocmVnLnN1bW1hcnkkYmljKQpwb2ludHMobWluX2JpYyxyZWcuc3VtbWFyeSRiaWNbbWluX2JpY10sY29sPSJyZWQiLGNleD0yLHBjaD0yMCkKYGBgCgpgYGB7cn0KI0JhY2t3YXJkcyBzdGVwd2lzZSBzZWxlY3Rpb24KcmVnZml0LmJ3ZCA9IHJlZ3N1YnNldHMoc2FsYXJ5IH4uICwgZGF0YT1ic3RhdHNfc2FsYXJ5X251bXZhcnMsbnZtYXg9MTMsIG1ldGhvZCA9ImJhY2t3YXJkIikKc3VtbWFyeShyZWdmaXQuYndkKQpgYGAKCmBgYHtyfQpyZWcuc3VtbWFyeSA8LSBzdW1tYXJ5KHJlZ2ZpdC5id2QpICNnZXQgdGhlIHN1bW1hcnkKCnBhcihtZnJvdz1jKDIsMikpCiNyc3MgcGxvdCAtICBOT1QgVVNFRlVMCnBsb3QocmVnLnN1bW1hcnkkcnNzICx4bGFiPSJOdW1iZXIgb2YgVmFyaWFibGVzICIseWxhYj0iUlNTIix0eXBlPSJsIikKCiNhZGpyMiBwbG90CnBsb3QocmVnLnN1bW1hcnkkYWRqcjIgLHhsYWI9Ik51bWJlciBvZiBWYXJpYWJsZXMgIiwgeWxhYj0iQWRqdXN0ZWQgUlNxIix0eXBlPSJsIikKCm1heF9hZGpyMiA8LSB3aGljaC5tYXgocmVnLnN1bW1hcnkkYWRqcjIpCnBvaW50cyhtYXhfYWRqcjIsIHJlZy5zdW1tYXJ5JGFkanIyW21heF9hZGpyMl0sIGNvbD0icmVkIiwgY2V4PTIsIHBjaD0yMCkKCiMgQUlDIGNyaXRlcmlvbiAoQ3ApIHRvIG1pbmltaXplCnBsb3QocmVnLnN1bW1hcnkkY3AgLHhsYWI9Ik51bWJlciBvZiBWYXJpYWJsZXMgIix5bGFiPSJDcCIsIHR5cGU9J2wnKQoKbWluX2NwIDwtIHdoaWNoLm1pbihyZWcuc3VtbWFyeSRjcCApCnBvaW50cyhtaW5fY3AsIHJlZy5zdW1tYXJ5JGNwW21pbl9jcF0sIGNvbD0icmVkIiwgY2V4PTIsIHBjaD0yMCkKCiMgQklDIGNyaXRlcmlvbiB0byBtaW5pbWl6ZQpwbG90KHJlZy5zdW1tYXJ5JGJpYywgeGxhYj0iTnVtYmVyIG9mIFZhcmlhYmxlcyAiLCB5bGFiPSJCSUMiLCB0eXBlPSdsJykKCm1pbl9iaWMgPC0gd2hpY2gubWluKHJlZy5zdW1tYXJ5JGJpYykKcG9pbnRzKG1pbl9iaWMsIHJlZy5zdW1tYXJ5JGJpY1ttaW5fYmljXSwgY29sPSJyZWQiLCBjZXg9MiwgcGNoPTIwKQpgYGAKCmBgYHtyfQojcmlkZ2UgcmVncmVzc2lvbiAKCiMgZ2V0dGluZyB0aGUgcHJlZGljdG9ycwp4X3ZhciA8LSBic3RhdHNfc2FsYXJ5X251bXZhcnMgJT4lIHNlbGVjdCgtc2FsYXJ5KSAlPiUgYXMubWF0cml4KCkKIyBnZXR0aW5nIHRoZSBpbmRlcGVuZGVudCB2YXJpYWJsZQp5X3ZhciA8LSBic3RhdHNfc2FsYXJ5X251bXZhcnNbLCJzYWxhcnkiXQpgYGAKCmBgYHtyfQpyaWRnZSA8LSBnbG1uZXQoeF92YXIsIHlfdmFyLCBhbHBoYT0wKQpzdW1tYXJ5KHJpZGdlKQpgYGAKCmBgYHtyfQpjdl9yaWRnZSA8LSBjdi5nbG1uZXQoeF92YXIsIHlfdmFyLCBhbHBoYSA9IDApCmN2X3JpZGdlCmBgYAoKYGBge3J9CnBsb3QoY3ZfcmlkZ2UpCmBgYAoKYGBge3J9CmN2X3JpZGdlJGxhbWJkYS5taW4KYGBgCgpgYGB7cn0KY3ZfcmlkZ2UkbGFtYmRhLjFzZQpgYGAKCmBgYHtyfQpsYnNfZnVuIDwtIGZ1bmN0aW9uKGZpdCwgb2Zmc2V0X3g9MSwgLi4uKSB7CiAgTCA8LSBsZW5ndGgoZml0JGxhbWJkYSkKICB4IDwtIGxvZyhmaXQkbGFtYmRhW0xdKSArIG9mZnNldF94CiAgeSA8LSBmaXQkYmV0YVsgLExdCiAgbGFicyA8LSBuYW1lcyh5KQogIHRleHQoeCwgeSwgbGFiZWxzPWxhYnMsIC4uLikKfQoKcGxvdChyaWRnZSwgeHZhciA9ICJsYW1iZGEiLCBsYWJlbD1UKQpsYnNfZnVuKHJpZGdlKSAjIGFkZCBuYW1uZXMKCmFibGluZSh2ID0gbG9nKGN2X3JpZGdlJGxhbWJkYS5taW4pLCBjb2wgPSAicmVkIiwgbHR5PTIpICNsYW1iZGEubWluCmFibGluZSh2ID0gbG9nKGN2X3JpZGdlJGxhbWJkYS4xc2UpLCBjb2w9ImJsdWUiLCBsdHk9MikgICNsYW1iZGEuMXNlCmBgYAoKYGBge3J9Cm1pbl9yaWRnZSA8LSBnbG1uZXQoeF92YXIsIHlfdmFyLCBhbHBoYT0wLCBsYW1iZGEgPSBjdl9yaWRnZSRsYW1iZGEubWluKQpjb2VmKG1pbl9yaWRnZSkKYGBgCgpgYGB7cn0KIyBNYWtlIHByZWRpY3Rpb25zIG9uIHRoZSB0ZXN0IGRhdGEKcHJlZGljdGlvbnMgPC0gbWluX3JpZGdlICU+JSBwcmVkaWN0KHhfdmFyKSAlPiUgYXMudmVjdG9yKCkKCiMgTW9kZWwgcGVyZm9ybWFuY2UgbWV0cmljcwpkYXRhLmZyYW1lKAogIFJNU0UgPSBSTVNFKHByZWRpY3Rpb25zLCB5X3ZhciksCiAgUnNxdWFyZSA9IFIyKHByZWRpY3Rpb25zLCB5X3ZhcikKKQpgYGAKCmBgYHtyfQojIExhc3NvIAoKIyBnZXR0aW5nIHRoZSBwcmVkaWN0b3JzCnhfdmFyIDwtIGJzdGF0c19zYWxhcnlfbnVtdmFycyAlPiUgc2VsZWN0KC1zYWxhcnkpICU+JSBhcy5tYXRyaXgoKQojIGdldHRpbmcgdGhlIGluZGVwZW5kZW50IHZhcmlhYmxlCnlfdmFyIDwtIGJzdGF0c19zYWxhcnlfbnVtdmFyc1ssInNhbGFyeSJdCmBgYAoKCmBgYHtyfQpsYXNzbyA8LSBnbG1uZXQoeF92YXIsIHlfdmFyLCBhbHBoYT0xKQpzdW1tYXJ5KGxhc3NvKQpgYGAKCmBgYHtyfQpjdl9sYXNzbyA8LSBjdi5nbG1uZXQoeF92YXIsIHlfdmFyLCBhbHBoYSA9IDEpCmN2X2xhc3NvCmBgYAoKYGBge3J9CnBsb3QoY3ZfbGFzc28pCmBgYAoKCmBgYHtyfQpsYnNfZnVuIDwtIGZ1bmN0aW9uKGZpdCwgb2Zmc2V0X3g9MSwgLi4uKSB7CiAgTCA8LSBsZW5ndGgoZml0JGxhbWJkYSkKICB4IDwtIGxvZyhmaXQkbGFtYmRhW0xdKSsgb2Zmc2V0X3gKICB5IDwtIGZpdCRiZXRhWywgTF0KICBsYWJzIDwtIG5hbWVzKHkpCiAgdGV4dCh4LCB5LCBsYWJlbHM9bGFicywgLi4uKQp9CnBsb3QobGFzc28sIHh2YXIgPSAibGFtYmRhIiwgbGFiZWw9VCkKbGJzX2Z1bihsYXNzbykKCmFibGluZSh2PWxvZyhjdl9sYXNzbyRsYW1iZGEubWluKSwgY29sID0gInJlZCIsIGx0eT0yKQphYmxpbmUodj1sb2coY3ZfbGFzc28kbGFtYmRhLjFzZSksIGNvbD0iYmx1ZSIsIGx0eT0yKQpgYGAKCmBgYHtyfQptaW5fbGFzc28gPC0gZ2xtbmV0KHhfdmFyLCB5X3ZhciwgYWxwaGE9MSwgbGFtYmRhID0gY3ZfbGFzc28kbGFtYmRhLm1pbikKY29lZihtaW5fbGFzc28pCmBgYAoKYGBge3J9CnNlX2xhc3NvIDwtIGdsbW5ldCh4X3ZhciwgeV92YXIsIGFscGhhPTEsIGxhbWJkYSA9IGN2X2xhc3NvJGxhbWJkYS4xc2UpCmNvZWYoc2VfbGFzc28pCmBgYAoKYGBge3J9CiMgTWFrZSBwcmVkaWN0aW9ucyBvbiB0aGUgdGVzdCBkYXRhCnByZWRpY3Rpb25zIDwtIG1pbl9sYXNzbyAlPiUgcHJlZGljdCh4X3ZhcikgJT4lIGFzLnZlY3RvcigpCiMgTW9kZWwgcGVyZm9ybWFuY2UgbWV0cmljcwpkYXRhLmZyYW1lKAogIFJNU0UgPSBSTVNFKHByZWRpY3Rpb25zLCB5X3ZhciksCiAgUnNxdWFyZSA9IFIyKHByZWRpY3Rpb25zLCB5X3ZhcikKKQpgYGAKCgoKIyMgU2FsYXJ5IERhdGEKYGBge3J9CmZyYW5jaGlzZSA8LSBjKGBBTkFgID0gIkxBQSIsIGBBUklgID0gIkFSSSIsIGBBVExgID0gIkFUTCIsIAogICAgICAgICAgICAgICBgQkFMYCA9ICJCQUwiLCBgQk9TYCA9ICJCT1MiLCBgQ0FMYCA9ICJMQUEiLAogICAgICAgICAgICAgICBgQ0hBYCA9ICJDSEEiLCBgQ0hOYCA9ICJDSE4iLCBgQ0lOYCA9ICJDSU4iLCAKICAgICAgICAgICAgICAgYENMRWAgPSAiQ0xFIiwgYENPTGAgPSAiQ09MIiwgYERFVGAgPSAiREVUIiwgCiAgICAgICAgICAgICAgIGBGTE9gID0gIk1JQSIsIGBIT1VgID0gIkhPVSIsIGBLQ0FgID0gIktDQSIsIAogICAgICAgICAgICAgICBgTEFBYCA9ICJMQUEiLCBgTEFOYCA9ICJMQU4iLCBgTUlBYCA9ICJNSUEiLCAKICAgICAgICAgICAgICAgYE1JTGAgPSAiTUlMIiwgYE1JTmAgPSAiTUlOIiwgYE1MNGAgPSAiTUlMIiwgCiAgICAgICAgICAgICAgIGBNT05gID0gIldBUyIsIGBOWUFgID0gIk5ZQSIsIGBOWU1gID0gIk5ZTiIsIAogICAgICAgICAgICAgICBgTllOYCA9ICJOWU4iLCBgT0FLYCA9ICJPQUsiLCBgUEhJYCA9ICJQSEkiLCAKICAgICAgICAgICAgICAgYFBJVGAgPSAiUElUIiwgYFNETmAgPSAiU0ROIiwgYFNFQWAgPSAiU0VBIiwKICAgICAgICAgICAgICAgYFNGR2AgPSAiU0ZOIiwgYFNGTmAgPSAiU0ZOIiwgYFNMTmAgPSAiU0xOIiwgCiAgICAgICAgICAgICAgIGBUQkFgID0gIlRCQSIsIGBURVhgID0gIlRFWCIsIGBUT1JgID0gIlRPUiIsCiAgICAgICAgICAgICAgIGBXQVNgID0gIldBUyIpCmBgYAoKYGBge3J9ClNhbGFyaWVzJGZyYW5jaGlzZSA8LSB1bm5hbWUoZnJhbmNoaXNlW1NhbGFyaWVzJHRlYW1JRF0pCmBgYAoKCmBgYHtyfQphdmdfdGVhbV9zYWxhcmllcyA8LSBTYWxhcmllcyAlPiUKICAgIGdyb3VwX2J5KHllYXJJRCwgZnJhbmNoaXNlLCBsZ0lEKSAlPiUKICAgIHN1bW1hcmlzZShzYWxhcnkgPSBtZWFuKHNhbGFyeSkvMWU2KSAlPiUKICAgIGZpbHRlcighKGZyYW5jaGlzZSA9PSAiQ0xFIiAmIGxnSUQgPT0gIk5MIikpCmBgYApgYGB7cn0KZ2dwbG90KGF2Z190ZWFtX3NhbGFyaWVzLCAKICAgICAgIGFlcyh4ID0geWVhcklELCB5ID0gc2FsYXJ5LCBncm91cCA9IGZhY3RvcihmcmFuY2hpc2UpKSkgKwogICAgICAgZ2VvbV9wYXRoKCkgKwogICAgICAgbGFicyh4ID0gIlllYXIiLCB5ID0gIkF2ZXJhZ2UgdGVhbSBzYWxhcnkgKG1pbGxpb25zIFVTRCkiKQpgYGAKCgo=